This document is to provide some plotting examples for reference.
The package usmap contains maps of US states and counties. There is also some associated data available about state and county demographics.
Example code includes:
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.4
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# Population by county
data(countypop, package="usmap")
# Population by state
data(statepop, package="usmap")
# Poverty rate by county
data(countypov, package="usmap")
# Poverty rate by state
data(statepov, package="usmap")
# Population of largest city by state
data(citypop, package="usmap")
# Location of earthquakes
data(earthquakes, package="usmap")
# Included datasets
countypop
## # A tibble: 3,142 x 4
## fips abbr county pop_2015
## <chr> <chr> <chr> <dbl>
## 1 01001 AL Autauga County 55347
## 2 01003 AL Baldwin County 203709
## 3 01005 AL Barbour County 26489
## 4 01007 AL Bibb County 22583
## 5 01009 AL Blount County 57673
## 6 01011 AL Bullock County 10696
## 7 01013 AL Butler County 20154
## 8 01015 AL Calhoun County 115620
## 9 01017 AL Chambers County 34123
## 10 01019 AL Cherokee County 25859
## # ... with 3,132 more rows
statepop
## # A tibble: 51 x 4
## fips abbr full pop_2015
## <chr> <chr> <chr> <dbl>
## 1 01 AL Alabama 4858979
## 2 02 AK Alaska 738432
## 3 04 AZ Arizona 6828065
## 4 05 AR Arkansas 2978204
## 5 06 CA California 39144818
## 6 08 CO Colorado 5456574
## 7 09 CT Connecticut 3590886
## 8 10 DE Delaware 945934
## 9 11 DC District of Columbia 672228
## 10 12 FL Florida 20271272
## # ... with 41 more rows
countypov
## # A tibble: 3,142 x 4
## fips abbr county pct_pov_2014
## <chr> <chr> <chr> <dbl>
## 1 01001 AL Autauga County 13.1
## 2 01003 AL Baldwin County 13
## 3 01005 AL Barbour County 25.4
## 4 01007 AL Bibb County 18.1
## 5 01009 AL Blount County 17.5
## 6 01011 AL Bullock County 35.1
## 7 01013 AL Butler County 25
## 8 01015 AL Calhoun County 20.5
## 9 01017 AL Chambers County 21.3
## 10 01019 AL Cherokee County 18.6
## # ... with 3,132 more rows
statepov
## # A tibble: 51 x 4
## fips abbr full pct_pov_2014
## <chr> <chr> <chr> <dbl>
## 1 01 AL Alabama 19.2
## 2 02 AK Alaska 11.4
## 3 04 AZ Arizona 18.2
## 4 05 AR Arkansas 18.7
## 5 06 CA California 16.4
## 6 08 CO Colorado 12.1
## 7 09 CT Connecticut 10.8
## 8 10 DE Delaware 13
## 9 11 DC District of Columbia 18.4
## 10 12 FL Florida 16.6
## # ... with 41 more rows
citypop
## # A tibble: 51 x 6
## lon lat state abbr most_populous_city city_pop
## <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 -86.8 33.6 Alabama AL Birmingham 212237
## 2 -150. 61.2 Alaska AK Anchorage 291826
## 3 -112. 33.4 Arizona AZ Phoenix 1445632
## 4 -92.3 34.7 Arkansas AR Little Rock 193524
## 5 -118. 34.0 California CA Los Angeles 3792621
## 6 -105. 39.8 Colorado CO Denver 600158
## 7 -73.2 41.2 Connecticut CT Bridgeport 144229
## 8 -75.6 39.8 Delaware DE Wilmington 70851
## 9 -77.0 38.9 District of Columbia DC Washington 693972
## 10 -81.7 30.3 Florida FL Jacksonville 880619
## # ... with 41 more rows
earthquakes
## # A tibble: 2,254 x 3
## lon lat mag
## <dbl> <dbl> <dbl>
## 1 -118. 35.7 2.79
## 2 -118. 36.3 3.1
## 3 -118. 36.2 2.74
## 4 -125. 40.4 2.77
## 5 -118. 35.7 2.59
## 6 -118. 35.7 2.82
## 7 -118. 35.7 2.78
## 8 -124. 40.3 3.42
## 9 -124. 41.7 2.5
## 10 -119. 35.3 2.58
## # ... with 2,244 more rows
# Basic, empty US maps
usmap::plot_usmap(regions="states")
usmap::plot_usmap(regions="counties")
# Basic, empty US maps subsetted to an area
usmap::plot_usmap(regions="states",
include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM")
)
usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"))
# Basic, subsetted state map with poverty rates included
usmap::plot_usmap(regions="states",
include=c("WA", "OR", "CA", "NV", "ID", "MT", "WY", "UT", "CO", "AZ", "NM"),
values="pct_pov_2014", data=statepov, labels=TRUE
) +
scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") +
labs(title="Poverty Rates by Western and Mountain States")
# Basic, subsetted county map with poverty rates included
usmap::plot_usmap(regions="counties", include=c("MN", "WI", "MI", "OH", "PA", "NY", "IN", "IL"),
values="pct_pov_2014", data=countypov, labels=FALSE
) +
scale_fill_continuous(low="lightblue", high="darkblue", "Poverty Rate (%)") +
labs(title="Poverty Rates by County in Great Lakes States")
The latitude and longitude data can be converted to a form suitable for usmap by using the usmap_transform function.
Example code includes:
# Transform the earthquakes data
trQuakes <- usmap::usmap_transform(earthquakes)
str(trQuakes)
## 'data.frame': 2254 obs. of 5 variables:
## $ lon : num -118 -118 -118 -125 -118 ...
## $ lat : num 35.7 36.3 36.2 40.4 35.7 ...
## $ mag : num 2.79 3.1 2.74 2.77 2.59 2.82 2.78 3.42 2.5 2.58 ...
## $ lon.1: num -1575319 -1632293 -1595342 -2065325 -1574008 ...
## $ lat.1: num -872720 -785212 -811800 -197614 -867723 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") +
geom_point(data=trQuakes, aes(x=lon.1, y=lat.1, size=mag), alpha=0.4) +
labs(title="Earthquakes of Magnitude 2.5+ (H1 2019)")
# Transform the largest city data
trCity <- usmap::usmap_transform(citypop)
str(trCity)
## 'data.frame': 51 obs. of 8 variables:
## $ lon : num -86.8 -112.1 -92.3 -118.2 -104.9 ...
## $ lat : num 33.6 33.5 34.7 34 39.8 ...
## $ state : chr "Alabama" "Arizona" "Arkansas" "California" ...
## $ abbr : chr "AL" "AZ" "AR" "CA" ...
## $ most_populous_city: chr "Birmingham" "Phoenix" "Little Rock" "Los Angeles" ...
## $ city_pop : num 212237 1445632 193524 3792621 600158 ...
## $ lon.1 : num 1220905 -1120927 702478 -1673156 -417275 ...
## $ lat.1 : num -1165156 -1202575 -1107534 -1034842 -570173 ...
# Add as a layer to the state map
usmap::plot_usmap(regions="states") +
geom_point(data=trCity, aes(x=lon.1, y=lat.1, size=city_pop)) +
labs(title="Largest City by State")
The census region definitions are included, and can be used to filter or color the maps.
Example code includes:
# Filter the map to include only new_england, mid_atlantic, and south_atlantic
usmap::plot_usmap(regions="states",
include=c(usmap::.new_england, usmap::.mid_atlantic, usmap::.south_atlantic)
)
# Create regions data for US states
regionData <- usmap::statepop %>%
mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0)))
usmap::plot_usmap(regions="states", data=regionData, values="region") +
scale_fill_discrete("Midwest") +
labs(title="Midwest Region US States")
# Enhanced Coloring and Labelling
usmap::plot_usmap(regions="states", data=regionData, values="region") +
scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) +
labs(title="Midwest Region US States")
Since usmap is built on ggplot2, the standard techniques from ggplot2 can be used to enhance the geography labelling. Further, centroids for the geographies are available in loadable files.
Example code includes:
# Base state map labelled with defaults
usmap::plot_usmap(regions="states", labels=TRUE, label_color="red")
# Base county map labelled with defaults
usmap::plot_usmap(regions="counties", include=c("TX", "OK"), labels=TRUE, label_color="grey")
# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata",
paste0("us_", "states", "_centroids.csv"), package = "usmap"
),
colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
)
# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata",
paste0("us_", "counties", "_centroids.csv"), package = "usmap"
),
colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
)
# Add state labels using geom_text
regionData <- usmap::statepop %>%
mutate(region=as.factor(ifelse(abbr %in% usmap::.midwest_region, 1, 0))) %>%
left_join(stCenter %>% select(x, y, full, fips) %>% rename(fname=full)) %>%
mutate(fname=ifelse(fname=="District of Columbia", "DC", str_replace_all(fname, " ", "\n")))
## Joining, by = "fips"
usmap::plot_usmap(regions="states", data=regionData[, c("fips", "region")], values="region") +
scale_fill_manual(values=c("lightgray", "lightblue"), "", labels=c("Other", "Midwest")) +
labs(title="Midwest Region US States") +
geom_text(data=filter(regionData, region==1), aes(x=x, y=y, label=fname), size=2.5)
# Add county labels using geom_text
regionData <- usmap::countypop %>%
mutate(region=as.factor(case_when(abbr=="OK" ~ 1, abbr=="TX" ~ 2, TRUE ~ 0))) %>%
left_join(ctCenter %>% select(x, y, county, fips) %>% rename(cname=county)) %>%
mutate(cname=str_replace_all(str_replace(cname, " County", ""), " ", "\n"))
## Joining, by = "fips"
usmap::plot_usmap(regions="counties", include=c("TX", "OK"),
data=regionData[, c("fips", "region")], values="region") +
scale_fill_manual(values=c("red", "orange"), "", labels=c("Oklahoma", "Texas")) +
labs(title="Texas and Oklahoma Counties") +
geom_text(data=filter(regionData, abbr %in% c("TX", "OK")),
aes(x=x, y=y, label=cname), size=2.5,
color=ifelse(pull(filter(regionData, abbr %in% c("TX", "OK")), abbr)=="OK", "white", "black")
)
Separate data exists for key population centers, which can be loaded and then added to maps.
Example code includes:
# Transform the largest city data
str(maps::us.cities)
## 'data.frame': 1005 obs. of 6 variables:
## $ name : chr "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
## $ country.etc: chr "TX" "OH" "CA" "GA" ...
## $ pop : int 113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
## $ lat : num 32.5 41.1 37.8 31.6 42.7 ...
## $ long : num -99.7 -81.5 -122.3 -84.2 -73.8 ...
## $ capital : int 0 0 0 0 2 0 0 0 0 0 ...
trCity <- usmap::usmap_transform(select(maps::us.cities, long, lat, everything())) %>%
mutate(useName=str_replace_all(str_sub(name, 1, -4), " ", "\n"))
str(trCity)
## 'data.frame': 1005 obs. of 9 variables:
## $ long : num -99.7 -81.5 -122.3 -84.2 -73.8 ...
## $ lat : num 32.5 41.1 37.8 31.6 42.7 ...
## $ name : chr "Abilene TX" "Akron OH" "Alameda CA" "Albany GA" ...
## $ country.etc: chr "TX" "OH" "CA" "GA" ...
## $ pop : int 113888 206634 70069 75510 93576 45535 494962 44933 127159 88857 ...
## $ capital : int 0 0 0 0 2 0 0 0 0 0 ...
## $ long.1 : num 24544 1533716 -1931842 1498524 2096820 ...
## $ lat.1 : num -1392670 -262400 -543195 -1350290 82431 ...
## $ useName : chr "Abilene" "Akron" "Alameda" "Albany" ...
# Define a key region for plotting
rgnPlot <- c(usmap::.west_south_central, usmap::.east_south_central)
popFilter <- 100000
# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") +
labs(title=paste0("South Central Cities with Population >= ", popFilter/1000, "k")) +
geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot),
aes(x=long.1, y=lat.1, size=pop), alpha=0.5
)
# Plot the full nation for cities of 250k +
rgnPlot <- c(usmap::.midwest_region, usmap::.northeast_region,
usmap::.south_region, usmap::.west_region
)
popFilter <- 250000
# Add cities as a layer to the state map
usmap::plot_usmap(regions="states", include=rgnPlot, fill="lightblue") +
labs(title=paste0("US Cities with Population >= ", popFilter/1000, "k")) +
geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot),
aes(x=long.1, y=lat.1, size=pop), alpha=0.5
)
# Plot cities by name for the Four Corners region
rgnPlot <- c("UT", "CO", "NM", "AZ")
popFilter <- 125000
# Add cities as a layer to the state map (points)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") +
labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) +
geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot),
aes(x=long.1, y=lat.1, size=pop), alpha=0.5
)
# Add cities as a layer to the state map (text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") +
labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) +
geom_text(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot),
aes(x=long.1, y=lat.1, size=pop, label=useName)
)
popFilter <- 50000
popFilter2 <- 250000
# Add cities as a layer to the state map (points and text)
usmap::plot_usmap(regions="counties", include=rgnPlot, fill="lightblue") +
labs(title=paste0("Four Corners Cities with Population >= ", popFilter/1000, "k")) +
geom_point(data=filter(trCity, pop >= popFilter, country.etc %in% rgnPlot),
aes(x=long.1, y=lat.1, size=pop), alpha=0.5
) +
geom_text(data=filter(trCity, pop >= popFilter2, country.etc %in% rgnPlot),
aes(x=long.1, y=lat.1, size=pop, label=useName), color="red", show.legend=FALSE
)
Using scale_fill_manual(), custom colors can be created by geography.
Example code includes:
# Basic county population map with continuous colors
usmap::countypop %>%
filter(abbr %in% c("OH", "IN", "KY")) %>%
mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n")) %>%
usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pop") +
scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (000)") +
labs(title="Indiana, Ohio, and Kentucky - Population by County")
# Custom county population map with colors - red for Indiana, blue for Kentucky, grey for Ohio
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), popLabels)
usmap::countypop %>%
filter(abbr %in% c("OH", "KY", "IN")) %>%
mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"),
pBucket=findInterval(pop, popBucket),
pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(popBucket))
) %>%
usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) +
labs(title="Indiana, Ohio, and Kentucky - Population by County") +
theme(legend.position = "bottom") +
guides(fill=guide_legend(nrow=3))
# Custom county poverty rate map with colors - red for Indiana, blue for Kentucky, grey for Ohio
povBucket <- c(0, 15, 30)
povLabels <- sapply(1:(length(povBucket)-1), FUN=function(x){paste0(povBucket[x], "-", povBucket[x+1])})
povLabels <- c(povLabels, paste0(povBucket[length(povBucket)], "+"))
guideLabels <- paste(rep(c("OH", "KY", "IN"), each=3), povLabels)
usmap::countypov %>%
filter(abbr %in% c("OH", "KY", "IN")) %>%
mutate(name=str_replace(str_replace(county, " County", ""), " ", "\n"),
pBucket=findInterval(pct_pov_2014, povBucket),
pColor=rgb(abbr=="IN", 0, abbr=="KY", pBucket/length(povBucket))
) %>%
usmap::plot_usmap(regions="counties", include=c("OH", "IN", "KY"), data=., values="pColor") +
scale_fill_identity(guide="legend", "Poverty Rate (%)", labels=guideLabels) +
labs(title="Indiana, Ohio, and Kentucky - Poverty Rate by County") +
theme(legend.position = "bottom") +
guides(fill=guide_legend(nrow=3))
The above techniques can be combined for custom labeling of key geographies.
Example code includes:
# Basic state population data
stateData <- usmap::statepop %>%
mutate(pop=round(pop_2015/1000000, 1),
name=ifelse(full=="District of Columbia", "DC", str_replace(full, " ", "\n")),
lab=paste0(abbr, "\n(", pop, ")\n")
)
# Load state centroid data
stCenter <- utils::read.csv(system.file("extdata",
paste0("us_", "states", "_centroids.csv"), package = "usmap"
),
colClasses = c(rep("numeric", 2), rep("character", 3)), stringsAsFactors = FALSE
)
# Grab centroids for top 5 states
top5State <- stateData %>%
top_n(5, pop) %>%
left_join(select(stCenter, x, y, fips))
## Joining, by = "fips"
# Plot state population with continuous colors and custom labels
stateData %>%
usmap::plot_usmap(regions="states", data=., values="pop") +
scale_fill_continuous(low="lightblue", high="darkblue", "Pop. (millions)") +
labs(title="Population by State", subtitle="Top 5 in millions") +
geom_text(data=top5State, aes(x=x, y=y, label=lab), color="white", size=4, fontface="bold")
# Load county centroid data
ctCenter <- utils::read.csv(system.file("extdata",
paste0("us_", "counties", "_centroids.csv"), package = "usmap"
),
colClasses = c(rep("numeric", 2), rep("character", 4)), stringsAsFactors = FALSE
)
# Custom county population map with colors - red for Wisconsin, blue for Michigan
popBucket <- c(0, 100, 500)
popLabels <- sapply(1:(length(popBucket)-1), FUN=function(x){paste0(popBucket[x], "-", popBucket[x+1])})
popLabels <- c(popLabels, paste0(popBucket[length(popBucket)], "+"))
guideLabels <- paste(rep(c("MI", "WI"), each=3), popLabels)
# Grab county data for counties exceeding the top popBucket
ctyData <- usmap::countypop %>%
filter(abbr %in% c("MI", "WI")) %>%
mutate(pop=round(pop_2015/1000, 0),
name=str_replace(str_replace(county, " County", ""), " ", "\n"),
lab=paste0(name, "\n(", pop, ")\n")
)
topCounty <- ctyData %>%
filter(pop >= max(popBucket)) %>%
left_join(select(ctCenter, x, y, fips))
## Joining, by = "fips"
# Create county population map
usmap::countypop %>%
filter(abbr %in% c("MI", "WI")) %>%
mutate(pop=pop_2015/1000, name=str_replace(str_replace(county, " County", ""), " ", "\n"),
pBucket=findInterval(pop, popBucket),
pColor=rgb(abbr=="WI", 0, abbr=="MI", pBucket/length(popBucket))
) %>%
usmap::plot_usmap(regions="counties", include=c("WI", "MI"), data=., values="pColor") +
scale_fill_identity(guide="legend", "Pop. (000)", labels=guideLabels) +
geom_text(data=topCounty, aes(x=x, y=y, label=lab), size=3, fontface="bold", color="white") +
labs(title="Michigan and Wisconsin - Population by County", subtitle="Labelled Pop. (000) for 500k+") +
theme(legend.position = "bottom") +
guides(fill=guide_legend(nrow=3)) +
theme(panel.background=element_rect(color="black", fill="lightgrey"))
The ggridges package has weather data for Lincoln, NE in the data file ‘lincoln_weather’. The data are captured once per day for 366 days of 2016. Simple plots can be made of the average temperatures and dew points.
Example code includes:
data(lincoln_weather, package="ggridges")
str(lincoln_weather, give.attr=FALSE)
## Classes 'tbl_df', 'tbl' and 'data.frame': 366 obs. of 24 variables:
## $ CST : chr "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
## $ Max Temperature [F] : int 37 41 37 30 38 34 33 28 22 31 ...
## $ Mean Temperature [F] : int 24 23 23 17 29 33 30 25 9 11 ...
## $ Min Temperature [F] : int 11 5 8 4 19 32 27 22 -4 -9 ...
## $ Max Dew Point [F] : int 19 22 23 24 29 33 32 25 17 20 ...
## $ Mean Dew Point [F] : int 13 14 15 13 25 32 30 22 4 5 ...
## $ Min Dewpoint [F] : int 8 4 8 2 19 29 25 18 -8 -13 ...
## $ Max Humidity : int 88 100 92 92 96 100 100 92 87 87 ...
## $ Mean Humidity : int 68 72 73 82 83 91 96 85 77 75 ...
## $ Min Humidity : int 47 44 54 72 70 82 92 78 67 63 ...
## $ Max Sea Level Pressure [In] : num 30.5 30.4 30.5 30.5 30.2 ...
## $ Mean Sea Level Pressure [In]: num 30.4 30.3 30.4 30.4 30.1 ...
## $ Min Sea Level Pressure [In] : num 30.3 30.2 30.3 30.2 30 ...
## $ Max Visibility [Miles] : int 10 10 10 10 10 10 9 10 10 10 ...
## $ Mean Visibility [Miles] : int 10 10 10 9 8 4 3 6 9 10 ...
## $ Min Visibility [Miles] : int 10 10 10 6 5 0 0 2 5 10 ...
## $ Max Wind Speed [MPH] : int 20 15 13 17 22 16 16 25 25 10 ...
## $ Mean Wind Speed[MPH] : int 9 6 5 7 13 7 7 16 14 5 ...
## $ Max Gust Speed [MPH] : int 23 18 14 23 28 21 21 32 28 12 ...
## $ Precipitation [In] : chr "0" "0" "0" "0" ...
## $ CloudCover : int 0 0 0 1 4 8 8 8 5 0 ...
## $ Events : chr NA NA NA NA ...
## $ WindDir [Degrees] : int 280 312 330 155 178 167 7 338 340 268 ...
## $ Month : Factor w/ 12 levels "December","November",..: 12 12 12 12 12 12 12 12 12 12 ...
# Extract temperature and dew point data
tdData <- lincoln_weather %>%
select(CST, maxT=`Max Temperature [F]`, minT=`Min Temperature [F]`, meanT=`Mean Temperature [F]`,
maxD=`Max Dew Point [F]`, minD=`Min Dewpoint [F]`, meanD=`Mean Dew Point [F]`
) %>%
mutate(date=as.Date(CST))
str(tdData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 366 obs. of 8 variables:
## $ CST : chr "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
## $ maxT : int 37 41 37 30 38 34 33 28 22 31 ...
## $ minT : int 11 5 8 4 19 32 27 22 -4 -9 ...
## $ meanT: int 24 23 23 17 29 33 30 25 9 11 ...
## $ maxD : int 19 22 23 24 29 33 32 25 17 20 ...
## $ minD : int 8 4 8 2 19 29 25 18 -8 -13 ...
## $ meanD: int 13 14 15 13 25 32 30 22 4 5 ...
## $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Plot temperatures by day
tdData %>%
select(date, maxT, meanT, minT) %>%
pivot_longer(-date) %>%
ggplot(aes(x=date, y=value, group=name)) +
geom_line(aes(color=name))
# Plot dew points by day
tdData %>%
select(date, maxD, meanD, minD) %>%
pivot_longer(-date) %>%
ggplot(aes(x=date, y=value, group=name)) +
geom_line(aes(color=name))
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
# Create an XTS for temperature data
tdXTS <- xts(select(tdData, minT, meanT, maxT), order.by=tdData$date)
# Create and plot weekly and monthly averages
tdXTS %>%
apply.weekly(FUN=mean, na.rm=TRUE) %>%
plot(main="Weekly Temperature Average (Lincoln, NE 2016)")
tdXTS %>%
apply.monthly(FUN=mean, na.rm=TRUE) %>%
plot(main="Monthly Temperature Average (Lincoln, NE 2016)")
# Create an XTS for dew-point data
tdXTS <- xts(select(tdData, minD, meanD, maxD), order.by=tdData$date)
# Create and plot weekly and monthly averages
tdXTS %>%
apply.weekly(FUN=mean, na.rm=TRUE) %>%
plot(main="Weekly Dew Point Average (Lincoln, NE 2016)")
tdXTS %>%
apply.monthly(FUN=mean, na.rm=TRUE) %>%
plot(main="Monthly Dew Point Average (Lincoln, NE 2016)")
The xts package is good for working with time series data, while ggplot2 is strong for customizing plots. The packages can be combined in using the weather data.
Example code includes:
# Create an XTS for temperature and dewpoint data
tdXTS <- xts(select(tdData, minT, meanT, maxT, minD, meanD, maxD), order.by=tdData$date)
# Use xts for monthly average and ggplot2 for plotting
basePlot <- tdXTS %>%
apply.monthly(FUN=mean, na.rm=TRUE) %>%
data.frame(date=index(.), row.names=NULL) %>%
ggplot(aes(x=date-lubridate::days(15))) +
geom_ribbon(aes(ymin=minT, ymax=maxT), color="lightblue", fill="lightblue", alpha=0.5) +
geom_line(aes(y=meanT), color="blue", lwd=1) +
labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)",
subtitle="Monthly Avg. Temperature (F)"
)
basePlot
# Add labelling for the three elements
hiMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[3]
loMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[9]
muMonth <- index(tdXTS %>% apply.monthly(FUN=mean))[6]
hiPoint <- c(60, 75)
loPoint <- c(45, 25)
muPoint <- c(72.5, 45)
labFrame <- data.frame(x=c(hiMonth, loMonth, muMonth),
yend=c(hiPoint[1], loPoint[1], muPoint[1]),
y=c(hiPoint[2], loPoint[2], muPoint[2]),
text=c("Avg. Monthly High", "Avg. Monthly Low", "Avg. Monthly Mean")) %>%
mutate(xend=x)
basePlot +
geom_segment(data=labFrame, aes(x=x, y=y, xend=xend, yend=yend), arrow=arrow()) +
geom_text(data=labFrame, aes(x=x, y=y+c(5, -5, -5), label=text), fontface="bold", size=4)
# Can also create and plot a custom rolling average
baseData <- tdXTS %>%
data.frame(date=index(.), row.names=NULL)
base7Day <- rollapply(tdXTS, 7, FUN=mean, na.rm=TRUE) %>%
data.frame(date=index(.), row.names=NULL)
base30Day <- rollapply(tdXTS, 30, FUN=mean, na.rm=TRUE) %>%
data.frame(date=index(.), row.names=NULL)
plotFrame <- bind_rows(baseData, base7Day, base30Day, .id="rolling") %>%
mutate(rollLabel=case_when(rolling==1 ~ "Daily",
rolling==2 ~ "7 Day Rolling",
rolling==3 ~ "30 Day Rolling",
TRUE ~ "ERROR"
)
)
plotFrame %>%
ggplot(aes(x=date)) +
geom_line(aes(y=meanT, color=rollLabel, group=rollLabel), lwd=1) +
labs(x="Month", y="Avg. Temperature (F)", title="Lincoln, NE Weather (2016)",
subtitle="Daily Avg. Temperature (F)"
)
## Warning: Removed 35 rows containing missing values (geom_path).
Humidity data are also available in the lincoln_weather dataset. There is a relationship between temperature, dewpoint, and humidity.
Example code includes:
htdData <- lincoln_weather %>%
select(CST, meanT=`Mean Temperature [F]`, meanD=`Mean Dew Point [F]`, meanH=`Mean Humidity`) %>%
mutate(date=as.Date(CST), month=lubridate::month(date))
str(htdData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 366 obs. of 6 variables:
## $ CST : chr "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
## $ meanT: int 24 23 23 17 29 33 30 25 9 11 ...
## $ meanD: int 13 14 15 13 25 32 30 22 4 5 ...
## $ meanH: int 68 72 73 82 83 91 96 85 77 75 ...
## $ date : Date, format: "2016-01-01" "2016-01-02" ...
## $ month: num 1 1 1 1 1 1 1 1 1 1 ...
# Histogram for average humidity
htdData %>%
ggplot(aes(x=meanH)) +
geom_histogram() +
labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
htdData %>%
filter((meanH < 25) | is.na(meanH))
## # A tibble: 2 x 6
## CST meanT meanD meanH date month
## <chr> <int> <int> <int> <date> <dbl>
## 1 2016-2-27 50 21 0 2016-02-27 2
## 2 2016-2-28 44 NA NA 2016-02-28 2
htdData <- htdData %>%
filter(!((meanH < 25) | is.na(meanH)))
# Updated Histogram for average humidity
htdData %>%
ggplot(aes(x=meanH)) +
geom_histogram() +
labs(title="Mean Humidity Histogram", subtitle="Lincoln, NE (2016)", x="Mean Humidity (%)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Histogram for dewpoint depression (T - D)
htdData %>%
ggplot(aes(x=meanT-meanD)) +
geom_histogram() +
labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)",
x="Mean Dewpoint Depression (F)", y="Count")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
htdData %>%
filter(meanD >= meanT)
## # A tibble: 2 x 6
## CST meanT meanD meanH date month
## <chr> <int> <int> <int> <date> <dbl>
## 1 2016-1-7 30 30 96 2016-01-07 1
## 2 2016-11-27 37 39 85 2016-11-27 11
htdData <- htdData %>%
filter(meanT >= meanD)
# Updated Histogram for dewpoint depression (T - D)
htdData %>%
ggplot(aes(x=meanT-meanD, y=..density..)) +
geom_histogram(binwidth=1) +
geom_density(color="red") +
labs(title="Mean Dewpoint Depression Histogram", subtitle="Lincoln, NE (2016)",
x="Mean Dewpoint Depression (F)", y="Proportion")
# Average humidity by month
htdData %>%
group_by(month) %>%
summarize(meanH=mean(meanH, na.rm=TRUE)) %>%
ggplot(aes(x=as.factor(month), y=meanH)) +
geom_col() +
labs(title="Average Humidity by Month", subtitle="Lincoln, NE (2016)", x="Month", y="Mean Humidity (%)")
# Relationship between temperature and dewpoint
htdData %>%
ggplot(aes(x=meanD, y=meanT)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
labs(title="Daily Averages", subtitle="Lincoln, NE (2016)",
x="Mean Dewpoint (F)", y="Mean Temperature (F)"
)
# Relationship between dewpoint depression and humidity
htdData %>%
mutate(dpD=meanT-meanD) %>%
ggplot(aes(x=dpD, y=meanH)) +
geom_point() +
labs(title="Daily Averages", subtitle="Lincoln, NE (2016)",
x="Mean Dewpoint Depression (F)", y="Mean Humidity (%)"
)
# Relationship between temperature and dewpoint and humidity
humInts <- c(0, 50, 60, 70, 80)
humLabel <- sapply(1:(length(humInts)-1), FUN=function(x) { paste0(humInts[x], "-", humInts[x+1]) })
humLabel <- c(humLabel, paste0(humInts[length(humInts)], "+"))
htdData %>%
mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
ggplot(aes(x=meanD, y=meanT, color=humBin)) +
geom_point() +
geom_smooth(se=FALSE) +
geom_abline(slope=1, intercept=0) +
labs(title="Daily Averages", subtitle="Lincoln, NE (2016)",
x="Mean Dewpoint (F)", y="Mean Temperature (F)"
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Expressed using dewpoint depression vs. dewpoint
htdData %>%
mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
ggplot(aes(x=meanD, y=meanT-meanD, color=humBin)) +
geom_point() +
geom_smooth() +
labs(title="Daily Averages", subtitle="Lincoln, NE (2016)",
x="Mean Dewpoint (F)", y="Dewpoint Depression (F)"
)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
# Expressed using dewpoint depression vs. temperature
htdData %>%
mutate(humBin=factor(findInterval(meanH, humInts), levels=1:length(humInts), labels=humLabel)) %>%
ggplot(aes(x=meanT, y=meanT-meanD, color=humBin)) +
geom_point() +
geom_smooth(se=FALSE, method="lm") +
labs(title="Daily Averages", subtitle="Lincoln, NE (2016)",
x="Mean Temperature (F)", y="Dewpoint Depression (F)"
)
# Linear regression for temperature, dewpoint, and humidity
htdReg <- htdData %>%
mutate(dpD=meanT-meanD) %>%
lm(meanH ~ meanT + dpD, data=.)
summary(htdReg)
##
## Call:
## lm(formula = meanH ~ meanT + dpD, data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.3569 -2.5472 -0.1894 2.7285 14.4934
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 82.89890 0.83037 99.833 < 2e-16 ***
## meanT 0.09771 0.01403 6.965 1.57e-11 ***
## dpD -1.79530 0.04811 -37.317 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.869 on 360 degrees of freedom
## Multiple R-squared: 0.796, Adjusted R-squared: 0.7949
## F-statistic: 702.4 on 2 and 360 DF, p-value: < 2.2e-16
htdData %>%
mutate(dpD=meanT-meanD) %>%
mutate(predH=predict(htdReg, newdata=.)) %>%
ggplot(aes(x=predH, y=meanH)) +
geom_point() +
geom_abline(slope=1, intercept=0) +
labs(title="Daily Averages", subtitle="Lincoln, NE (2016)",
x="Predicted Humidity (%)", y="Actual Humidity (%)"
)
Wind data (speed, gust, direction) are also available in the lincoln_weather dataset..
Example code includes:
# Extract wind data
wdData <- lincoln_weather %>%
select(CST, maxW=`Max Wind Speed [MPH]`, maxG=`Max Gust Speed [MPH]`, meanW=`Mean Wind Speed[MPH]`,
dirW=`WindDir [Degrees]`
) %>%
mutate(date=as.Date(CST))
str(wdData)
## Classes 'tbl_df', 'tbl' and 'data.frame': 366 obs. of 6 variables:
## $ CST : chr "2016-1-1" "2016-1-2" "2016-1-3" "2016-1-4" ...
## $ maxW : int 20 15 13 17 22 16 16 25 25 10 ...
## $ maxG : int 23 18 14 23 28 21 21 32 28 12 ...
## $ meanW: int 9 6 5 7 13 7 7 16 14 5 ...
## $ dirW : int 280 312 330 155 178 167 7 338 340 268 ...
## $ date : Date, format: "2016-01-01" "2016-01-02" ...
# Manage missing data
wdData[!complete.cases(wdData), ]
## # A tibble: 6 x 6
## CST maxW maxG meanW dirW date
## <chr> <int> <int> <int> <int> <date>
## 1 2016-2-6 18 NA 5 241 2016-02-06
## 2 2016-2-28 36 45 18 NA 2016-02-28
## 3 2016-5-31 17 NA 11 319 2016-05-31
## 4 2016-6-18 NA NA NA -1 2016-06-18
## 5 2016-6-19 NA NA NA -1 2016-06-19
## 6 2016-12-19 17 NA 9 209 2016-12-19
wdData <- wdData %>%
filter(dirW != -1, !is.na(dirW)) %>%
mutate(maxG=ifelse(is.na(maxG), maxW, maxG))
summary(wdData)
## CST maxW maxG meanW
## Length:363 Min. : 8.00 Min. : 10.00 Min. : 0.00
## Class :character 1st Qu.: 16.00 1st Qu.: 21.00 1st Qu.: 6.00
## Mode :character Median : 21.00 Median : 26.00 Median : 8.00
## Mean : 21.71 Mean : 27.93 Mean : 9.11
## 3rd Qu.: 25.00 3rd Qu.: 33.00 3rd Qu.:11.00
## Max. :131.00 Max. :143.00 Max. :27.00
## dirW date
## Min. : 1.0 Min. :2016-01-01
## 1st Qu.:143.5 1st Qu.:2016-04-01
## Median :193.0 Median :2016-07-03
## Mean :209.8 Mean :2016-07-01
## 3rd Qu.:310.0 3rd Qu.:2016-10-01
## Max. :358.0 Max. :2016-12-31
# Manage very high wind data
wdData[wdData$maxG >= 60, ]
## # A tibble: 3 x 6
## CST maxW maxG meanW dirW date
## <chr> <int> <int> <int> <int> <date>
## 1 2016-3-23 45 61 25 20 2016-03-23
## 2 2016-6-17 131 143 20 170 2016-06-17
## 3 2016-12-25 51 64 19 139 2016-12-25
wdData <- wdData %>%
filter(maxG <= 80)
summary(wdData)
## CST maxW maxG meanW
## Length:362 Min. : 8.0 Min. :10.00 Min. : 0.00
## Class :character 1st Qu.:16.0 1st Qu.:21.00 1st Qu.: 6.00
## Mode :character Median :21.0 Median :26.00 Median : 8.00
## Mean :21.4 Mean :27.61 Mean : 9.08
## 3rd Qu.:25.0 3rd Qu.:33.00 3rd Qu.:11.00
## Max. :51.0 Max. :64.00 Max. :27.00
## dirW date
## Min. : 1.0 Min. :2016-01-01
## 1st Qu.:143.2 1st Qu.:2016-04-01
## Median :193.0 Median :2016-07-03
## Mean :209.9 Mean :2016-07-01
## 3rd Qu.:310.0 3rd Qu.:2016-10-01
## Max. :358.0 Max. :2016-12-31
# Density of wind speeds
wdData %>%
select(date, meanW, maxW, maxG) %>%
pivot_longer(-date) %>%
ggplot(aes(x=value, fill=name)) +
geom_density(alpha=0.5) +
scale_fill_discrete(name="Wind Speed [MPH]", labels=c("Max Gust", "Max", "Mean")) +
labs(title="Lincoln, NE (2016) Wind Speeds", y="Density", x="Wind Speed [MPH]")
# Density of wind direction
wdData %>%
select(date, dirW) %>%
ggplot(aes(x=dirW)) +
geom_density(alpha=0.5, fill="blue") +
labs(title="Winds are mainly from the S and NW", subtitle="Lincoln, NE (2016)",
y="Density", x="Wind Direction"
)
# Wind speed and direction
wdData %>%
ggplot(aes(x=meanW, y=dirW)) +
geom_point(alpha=0.25) +
coord_polar(theta="y") +
labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Mean Wind Speed", x="Mean Wind Speed [MPH]") +
scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) +
scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) +
geom_point(aes(x=0, y=0), color="red", size=2)
# Wind speed and direction as factors
windDirs <- c("N", "NE", "E", "SE", "S", "SW", "W", "NW")
windSpeeds <- c(0, 5, 10, 15)
windLabels <- sapply(1:(length(windSpeeds)-1), FUN=function(x){ paste0(windSpeeds[x], "-", windSpeeds[x+1]) })
windLabels <- c(windLabels, paste0(windSpeeds[length(windSpeeds)], "+"))
wdData <- wdData %>%
mutate(wd=factor(floor(((dirW+22.5)/45) %% 8), levels=0:7, labels=windDirs),
ws=factor(findInterval(meanW, windSpeeds), levels=length(windSpeeds):1, labels=rev(windLabels))
)
# Summary of interaction between wind speed and wind direction
wdData %>%
group_by(wd) %>%
summarize(n=n(), avgMean=mean(meanW), avgMax=mean(maxW), avgGust=mean(maxG))
## # A tibble: 8 x 5
## wd n avgMean avgMax avgGust
## <fct> <int> <dbl> <dbl> <dbl>
## 1 N 51 10.1 21.4 27.7
## 2 NE 18 8.17 19.4 24.8
## 3 E 16 7.44 18.9 24.6
## 4 SE 69 8.01 19.6 25.5
## 5 S 73 8.89 20.6 26.8
## 6 SW 26 6.96 20.3 26.2
## 7 W 29 8.10 20.4 26.5
## 8 NW 80 11.1 25.4 32.2
table(wdData$wd, wdData$ws)
##
## 15+ 10-15 5-10 0-5
## N 8 14 25 4
## NE 1 7 6 4
## E 0 3 12 1
## SE 3 15 42 9
## S 5 23 43 2
## SW 0 6 15 5
## W 3 4 18 4
## NW 17 31 28 4
# Graph of wind speed and wind direction
wdData %>%
ggplot(aes(x=wd, fill=ws)) +
geom_bar() +
scale_fill_discrete(name="Wind Speed [MPH]") +
labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction")
# Graph of wind speed and wind direction (polar coordinates)
wdData %>%
ggplot(aes(x=wd, fill=ws)) +
geom_bar() +
scale_fill_discrete(name="Wind Speed [MPH]") +
labs(title="Lincoln, NE (2016) Wind Speeds and Directions", y="# Days", x="Wind Direction") +
coord_polar(start=-0.4)
Iowa State has a great database of archived weather data, including the historical METAR data (meteorological aerodrome report) for a number of reporting stations.
METAR include information on visibility, wind, temperature, dew point, precipitation, clouds, barometric pressure, and other features that may impact safe aviation.
The data for station KLNK (Lincoln, NE airport) was saved as a CSV from Iowa State
Some processing is required before using the METAR data:
Example code includes:
# Load METAR data
klnk <- readr::read_csv("./RInputFiles/metar_klnk_2016.txt", na=c("", "NA", "M"))
## Parsed with column specification:
## cols(
## .default = col_double(),
## station = col_character(),
## valid = col_datetime(format = ""),
## p01i = col_character(),
## skyc1 = col_character(),
## skyc2 = col_character(),
## skyc3 = col_character(),
## skyc4 = col_logical(),
## skyl4 = col_logical(),
## wxcodes = col_character(),
## ice_accretion_1hr = col_character(),
## ice_accretion_3hr = col_character(),
## ice_accretion_6hr = col_character(),
## peak_wind_time = col_datetime(format = ""),
## metar = col_character()
## )
## See spec(...) for full column specifications.
str(klnk, give.attr=FALSE)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of 29 variables:
## $ station : chr "LNK" "LNK" "LNK" "LNK" ...
## $ valid : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## $ tmpf : num 27 26.1 27 27 27 ...
## $ dwpf : num 19.9 19.9 19.9 21 19.9 ...
## $ relh : num 74.5 77.3 74.5 78 74.5 ...
## $ drct : num 300 0 0 280 310 10 0 10 20 0 ...
## $ sknt : num 5 0 0 3 5 9 0 3 3 0 ...
## $ p01i : chr "0.00" "0.00" "0.00" "0.00" ...
## $ alti : num 30.3 30.3 30.3 30.3 30.3 ...
## $ mslp : num 1028 1028 1028 1028 1029 ...
## $ vsby : num 10 10 10 10 10 10 10 10 10 10 ...
## $ gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ skyc1 : chr "OVC" "OVC" "OVC" "OVC" ...
## $ skyc2 : chr NA NA NA NA ...
## $ skyc3 : chr NA NA NA NA ...
## $ skyc4 : logi NA NA NA NA NA NA ...
## $ skyl1 : num 2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
## $ skyl2 : num NA NA NA NA 2700 NA NA NA NA NA ...
## $ skyl3 : num NA NA NA NA NA NA NA NA NA NA ...
## $ skyl4 : logi NA NA NA NA NA NA ...
## $ wxcodes : chr NA NA NA NA ...
## $ ice_accretion_1hr: chr NA NA NA NA ...
## $ ice_accretion_3hr: chr NA NA NA NA ...
## $ ice_accretion_6hr: chr NA NA NA NA ...
## $ peak_wind_gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ peak_wind_drct : num NA NA NA NA NA NA NA NA NA NA ...
## $ peak_wind_time : POSIXct, format: NA NA ...
## $ feel : num 20.4 26.1 27 22.9 20.4 ...
## $ metar : chr "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
# Filter to only data that ends with times ending in 54Z
metarKLNK <- klnk %>%
filter(str_detect(metar, "54Z"))
dim(metarKLNK)
## [1] 8813 29
# There should be 24*368=8832 records, so there are a handful (19) of missing METAR observations
minDate <- min(metarKLNK$valid)
expDate <- minDate + lubridate::hours(0:(24*368 - 1))
# Observations expected but not recorded
as.POSIXct(setdiff(expDate, metarKLNK$valid), origin="1970-01-01", tz="UTC")
## [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
## [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
## [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
## [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
## [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
# Observations recorded but not expected
setdiff(metarKLNK$valid, expDate)
## numeric(0)
# Confirmation of uniqueness
length(unique(metarKLNK$valid)) == length(metarKLNK$valid)
## [1] TRUE
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)
mtxWind <- metarKLNK %>%
pull(metar) %>%
str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
head(mtxWind)
## [,1] [,2] [,3] [,4]
## [1,] "30005KT" "300" "05" NA
## [2,] "00000KT" "000" "00" NA
## [3,] "00000KT" "000" "00" NA
## [4,] "28003KT" "280" "03" NA
## [5,] "31005KT" "310" "05" NA
## [6,] "01009KT" "010" "09" NA
table(mtxWind[, 2], useNA="ifany")
##
## 000 010 020 030 040 050 060 070 080 090 100 110 120 130 140 150
## 875 269 199 146 135 121 108 95 88 65 102 158 169 245 241 339
## 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310
## 463 565 517 413 284 179 142 96 73 80 105 89 121 141 147 225
## 320 330 340 350 360 VRB <NA>
## 234 303 352 413 383 114 19
table(mtxWind[, 3], useNA="ifany")
##
## 00 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
## 875 615 704 664 696 651 636 599 536 451 439 371 315 276 235 173
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 34
## 156 108 69 62 45 33 23 13 14 9 10 6 6 2 1 1
## <NA>
## 19
table(mtxWind[, 4], useNA="ifany")
##
## G14 G15 G16 G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29
## 6 11 11 16 30 59 69 87 83 107 101 83 62 61 61 37
## G30 G31 G32 G33 G34 G35 G36 G37 G38 G39 G40 G41 G42 G43 G45 <NA>
## 28 24 18 13 12 14 6 6 10 11 2 3 1 2 2 7777
# Verify that winds not captured are in fact missing from the METAR
metarKLNK[which(is.na(mtxWind[, 2])), "metar"]
## # A tibble: 19 x 1
## metar
## <chr>
## 1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028
## 2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083
## 3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
## 4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
## 5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $
## 6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007
## 7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
## 8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $
## 9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
metarKLNK <- metarKLNK %>%
mutate(dirW=mtxWind[, 2],
spdW=as.numeric(mtxWind[, 3]),
gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
)
# Plot for the wind direction
metarKLNK %>%
ggplot(aes(x=dirW)) +
geom_bar() +
labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)",
y="# Hourly Observations", x="Wind Direction"
)
# Plot for the minimum, average, and maximum wind speed by wind direction
# Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
metarKLNK %>%
filter(!is.na(dirW)) %>%
group_by(dirW) %>%
summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
ggplot(aes(x=dirW)) +
geom_point(aes(y=meanWind), color="red", size=2) +
geom_errorbar(aes(ymin=minWind, ymax=maxWind)) +
labs(title="Lincoln, NE Wind Direction", subtitle="KLNK METAR (2016)",
y="Wind Speed [KT]", x="Wind Direction"
)
# Plot for the wind speed
# Roughly 10% of the time, there is no wind in Lincoln
metarKLNK %>%
ggplot(aes(x=spdW)) +
geom_bar(aes(y=..count../sum(..count..))) +
labs(title="Roughly 10% of wind speeds in Lincoln, NE measure 0 Knots", subtitle="KLNK METAR (2016)",
y="% Hourly Observations", x="Wind Speed {KT]"
)
## Warning: Removed 19 rows containing non-finite values (stat_count).
metarKLNK %>%
filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
mutate(dirW=as.numeric(dirW)) %>%
group_by(dirW, spdW) %>%
summarize(n=n()) %>%
ggplot(aes(x=spdW, y=dirW)) +
geom_point(alpha=0.1, aes(size=n)) +
coord_polar(theta="y") +
labs(title="Lincoln, NE (2016)", subtitle="Direction vs. Wind Speed", x="Wind Speed [KT]") +
scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) +
scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) +
geom_point(aes(x=0, y=0), color="red", size=2)
## Warning: Removed 4 rows containing missing values (geom_point).
A properly formatted METAR includes the following information in order, though with variable amounts of other information in between.
dddd54Z ddddd[Gdd]KT dSM [M]dd/[M]dd Adddd RMK SLPddd Tdddddddd
Example code includes:
metAll <- metarKLNK %>%
pull(metar)
# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"
# Find the number of matching elements
str_detect(metAll, pattern=valMet) %>% table()
## .
## FALSE TRUE
## 23 8790
# The strings that do not match have errors in the raw data (typically, missing wind speed)
metAll[!str_detect(metAll, pattern=valMet)]
## [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"
## [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"
## [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"
## [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"
## [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $"
## [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"
## [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"
## [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
## [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"
# A matrix of string matches can be obtained
mtxParse <- str_match(metAll, pattern=valMet)
head(mtxParse)
## [,1]
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"
## [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] "300" "05" NA " " "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA " " "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA " " "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA " " "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA " " "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA " " "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
# Create a data frame
dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE)
names(dfParse) <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility",
"TempC", "DewC", "Altimeter", "SLP", "FahrC"
)
dfParse <- tibble::as_tibble(dfParse)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 11 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : chr "05" "00" "00" "03" ...
## $ WindGust : chr NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: chr "10SM" "10SM" "10SM" "10SM" ...
## $ TempC : chr "M03" "M03" "M03" "M03" ...
## $ DewC : chr "M07" "M07" "M07" "M06" ...
## $ Altimeter : chr "A3029" "A3030" "A3030" "A3031" ...
## $ SLP : chr "SLP275" "SLP277" "SLP277" "SLP281" ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
# Convert to numeric where appropriate
dfParse <- dfParse %>%
mutate(WindSpeed = as.integer(WindSpeed),
WindGust = as.numeric(WindGust),
Visibility = as.numeric(str_replace(Visibility, "SM", "")),
TempC = as.integer(str_replace(TempC, "M", "-")),
DewC = as.integer(str_replace(DewC, "M", "-")),
Altimeter = as.integer(str_replace(Altimeter, "A", "")),
SLP = as.integer(str_replace(SLP, "SLP", "")),
TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10,
DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
)
## Warning: NAs introduced by coercion
# Investigate the data
set.seed(2003211416)
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 13 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : int 5 0 0 3 5 9 0 3 0 0 ...
## $ WindGust : num NA NA NA NA NA NA NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: num 10 10 10 10 10 10 10 10 10 10 ...
## $ TempC : int -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## $ DewC : int -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## $ Altimeter : int 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## $ SLP : int 275 277 277 281 286 289 290 291 295 295 ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
## $ TempF : num 27 26.1 27 27 27 ...
## $ DewF : num 19.9 19.9 19.9 21 19.9 ...
head(dfParse)
## # A tibble: 6 x 13
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 300 5 NA " " 10 -3 -7 3029 275
## 2 54Z ~ 000 0 NA " " 10 -3 -7 3030 277
## 3 54Z ~ 000 0 NA " " 10 -3 -7 3030 277
## 4 54Z ~ 280 3 NA " " 10 -3 -6 3031 281
## 5 54Z ~ 310 5 NA " " 10 -3 -7 3033 286
## 6 54Z ~ 010 9 NA " " 10 -6 -10 3033 289
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
tail(dfParse)
## # A tibble: 6 x 13
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 160 6 NA " " 10 2 -4 2993 147
## 2 54Z ~ VRB 4 NA " " 10 4 -4 2990 139
## 3 54Z ~ 130 7 NA " " 10 4 -7 2989 134
## 4 54Z ~ 110 7 NA " " 10 4 -7 2988 130
## 5 54Z ~ 100 10 NA " " 10 3 -5 2986 125
## 6 54Z ~ 100 10 NA " " 10 3 -6 2986 123
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
dfParse %>%
sample_n(20)
## # A tibble: 20 x 13
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 360 7 NA " " 10 21 15 2995 130
## 2 54Z ~ 210 13 NA " " 10 31 4 2988 108
## 3 54Z ~ 340 11 NA " " 10 -7 -13 3025 259
## 4 54Z ~ 240 9 NA " " 10 13 8 2961 18
## 5 54Z ~ 310 6 NA " " 10 -7 -19 3044 329
## 6 54Z ~ 040 7 NA " " 10 26 14 3010 179
## 7 54Z ~ 170 17 NA " " 10 31 22 2983 90
## 8 54Z ~ 160 7 NA " " 10 -1 -4 3002 182
## 9 54Z ~ 310 9 NA " " 10 13 9 3021 226
## 10 54Z ~ 110 10 NA " " 10 16 16 2990 116
## 11 54Z ~ 140 5 NA " " 10 19 18 3013 199
## 12 54Z ~ 110 6 NA " " 10 20 4 3012 197
## 13 54Z ~ 220 4 NA " " 10 3 -4 3042 311
## 14 54Z ~ 350 3 NA " " 8 9 8 3011 194
## 15 54Z ~ 080 8 NA " " 10 -12 -18 3046 340
## 16 54Z ~ 320 7 NA " " 10 16 11 2998 144
## 17 54Z ~ 330 6 NA " " 10 9 5 3016 214
## 18 54Z ~ 340 13 NA " " 3 0 -1 2979 99
## 19 54Z ~ 190 5 NA " " 10 3 -4 3024 251
## 20 54Z ~ 320 18 NA " " 10 24 12 2999 147
## # ... with 3 more variables: FahrC <chr>, TempF <dbl>, DewF <dbl>
# Check for NA values
colSums(is.na(dfParse))
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC
## 23 23 23 8813 23 23 23
## DewC Altimeter SLP FahrC TempF DewF
## 23 23 23 23 23 23
# Plot of counts by key metric
keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC",
"DewC", "Altimeter", "SLP", "TempF", "DewF"
)
for (x in keyMetric) {
p <- dfParse %>%
group_by_at(vars(all_of(x))) %>%
summarize(n=n()) %>%
ggplot(aes_string(x=x, y="n")) +
geom_col() +
labs(title=x, y="Count")
print(p)
}
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit
# Correct for visibility
# Areas that have \\d \\d/\\dSM
sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))
valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
valSM1 <- str_replace(valSM1, "SM", "")
valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))
valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
valSM2 <- as.integer(str_sub(valSM2, 2, 2))
dfParse[sm1, "Visibility"] <- valSM1
dfParse[sm2, "Visibility"] <- dfParse[sm2, "Visibility"] + valSM2
dfParse %>%
count(Visibility)
## # A tibble: 18 x 2
## Visibility n
## <dbl> <int>
## 1 0.25 20
## 2 0.5 16
## 3 0.75 15
## 4 1 19
## 5 1.25 13
## 6 1.5 17
## 7 1.75 21
## 8 2 50
## 9 2.5 38
## 10 3 70
## 11 4 108
## 12 5 108
## 13 6 146
## 14 7 189
## 15 8 221
## 16 9 290
## 17 10 7449
## 18 NA 23
# Correct for wind gusts
gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
valGust <- as.integer(str_sub(valGust, 7, 8))
dfParse[gustCheck, "WindGust"] <- valGust
dfParse %>%
count(WindGust) %>%
as.data.frame
## WindGust n
## 1 14 4
## 2 15 11
## 3 16 11
## 4 17 16
## 5 18 29
## 6 19 59
## 7 20 69
## 8 21 87
## 9 22 83
## 10 23 107
## 11 24 101
## 12 25 83
## 13 26 62
## 14 27 61
## 15 28 61
## 16 29 37
## 17 30 28
## 18 31 24
## 19 32 18
## 20 33 13
## 21 34 12
## 22 35 14
## 23 36 6
## 24 37 6
## 25 38 10
## 26 39 11
## 27 40 2
## 28 41 3
## 29 42 1
## 30 43 2
## 31 45 2
## 32 NA 7780
# Correct for SLP
dfParse <- dfParse %>%
mutate(modSLP=ifelse(dfParse$SLP < 500, 1000 + dfParse$SLP/10, 900 + dfParse$SLP/10))
dfParse %>%
group_by(SLP, modSLP) %>%
summarize(n=n()) %>%
ggplot(aes(x=SLP, y=modSLP, size=n)) +
geom_point(alpha=0.3)
## Warning: Removed 1 rows containing missing values (geom_point).
# Check updated plots
keyMetric <- c("WindGust", "Visibility", "modSLP")
for (x in keyMetric) {
p <- dfParse %>%
group_by_at(vars(all_of(x))) %>%
summarize(n=n()) %>%
ggplot(aes_string(x=x, y="n")) +
geom_col() +
labs(title=x, y="Count")
print(p)
}
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
Many of the METAR variables are correlated/associated to one another.
Example code includes:
# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")
# Add the date back to the file (should edit the above instead)
dfParse <- dfParse %>%
mutate(month=lubridate::month(metarKLNK$valid))
str(dfParse)
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 15 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : int 5 0 0 3 5 9 0 3 0 0 ...
## $ WindGust : num NA NA NA NA NA NA NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: num 10 10 10 10 10 10 10 10 10 10 ...
## $ TempC : int -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## $ DewC : int -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## $ Altimeter : int 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## $ SLP : int 275 277 277 281 286 289 290 291 295 295 ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
## $ TempF : num 27 26.1 27 27 27 ...
## $ DewF : num 19.9 19.9 19.9 21 19.9 ...
## $ modSLP : num 1028 1028 1028 1028 1029 ...
## $ month : num 12 12 12 12 12 12 12 12 12 12 ...
# Keep only complete cases and find correlations
mtxCorr <- dfParse %>%
mutate(month=lubridate::month(metarKLNK$valid)) %>%
select_at(vars(all_of(coreNum))) %>%
filter(complete.cases(.)) %>%
cor()
# Print the correlations and show a heatmap
mtxCorr %>%
round(2)
## TempC TempF DewC DewF Altimeter modSLP WindSpeed Visibility
## TempC 1.00 1.00 0.91 0.91 -0.38 -0.48 0.13 0.19
## TempF 1.00 1.00 0.91 0.91 -0.38 -0.48 0.13 0.19
## DewC 0.91 0.91 1.00 1.00 -0.38 -0.48 -0.01 0.07
## DewF 0.91 0.91 1.00 1.00 -0.38 -0.48 -0.01 0.07
## Altimeter -0.38 -0.38 -0.38 -0.38 1.00 0.99 -0.26 0.07
## modSLP -0.48 -0.48 -0.48 -0.48 0.99 1.00 -0.25 0.04
## WindSpeed 0.13 0.13 -0.01 -0.01 -0.26 -0.25 1.00 -0.01
## Visibility 0.19 0.19 0.07 0.07 0.07 0.04 -0.01 1.00
corrplot::corrplot(mtxCorr, method="color", title="Lincoln, NE Hourly Weather Correlations (2016)")
# Create a function for plotting two variables against each other
plotNumCor <- function(var1, var2, title=NULL) {
if (is.null(title))
{ title <- paste0("Lincoln, NE (2016) Hourly Correlations of ", var1, " and ", var2) }
p <- dfParse %>%
group_by_at(vars(all_of(c(var1, var2)))) %>%
summarize(n=n()) %>%
ggplot(aes_string(x=var1, y=var2)) +
geom_point(alpha=0.5, aes_string(size="n")) +
geom_smooth(method="lm", aes_string(weight="n")) +
labs(x=var1, y=var2, title=title)
print(p)
}
# The three linear or almost linear relationships
plotNumCor("TempC", "TempF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
plotNumCor("DewC", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
plotNumCor("Altimeter", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# Strongly and positively related
plotNumCor("TempF", "DewF")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# Moderately negatively correlated
plotNumCor("TempF", "Altimeter")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
plotNumCor("TempF", "modSLP")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
plotNumCor("Altimeter", "WindSpeed")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# Predict modSLP from Altimeter
lmSLP1 <- lm(modSLP ~ Altimeter, data=dfParse)
lmSLP2 <- lm(modSLP ~ Altimeter + TempF, data=dfParse)
summary(lmSLP1)
##
## Call:
## lm(formula = modSLP ~ Altimeter, data = dfParse)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8890 -0.8172 -0.1578 0.7610 2.7890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.233e+01 1.406e+00 -44.34 <2e-16 ***
## Altimeter 3.594e-01 4.683e-04 767.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9691 on 8788 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.9853, Adjusted R-squared: 0.9853
## F-statistic: 5.889e+05 on 1 and 8788 DF, p-value: < 2.2e-16
summary(lmSLP2)
##
## Call:
## lm(formula = modSLP ~ Altimeter + TempF, data = dfParse)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.24130 -0.26737 0.00686 0.25214 1.23326
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.002e+01 5.615e-01 -17.84 <2e-16 ***
## Altimeter 3.428e-01 1.857e-04 1845.63 <2e-16 ***
## TempF -4.559e-02 1.921e-04 -237.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3561 on 8787 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.998, Adjusted R-squared: 0.998
## F-statistic: 2.209e+06 on 2 and 8787 DF, p-value: < 2.2e-16
# Plot predictions vs. actual (model 1)
dfParse %>%
filter(!is.na(modSLP)) %>%
mutate(pred1=predict(lmSLP1)) %>%
count(modSLP, pred1) %>%
ggplot(aes(x=modSLP, y=pred1)) +
geom_point(alpha=0.25, aes(size=n)) +
geom_smooth(method="lm", aes(weight=n)) +
labs(title="Predicted vs. Actual Sea Level Pressure - Altitude Only as Predictor",
subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
)
# Plot predictions vs. actual (model 2)
dfParse %>%
filter(!is.na(modSLP)) %>%
mutate(pred2=predict(lmSLP2)) %>%
count(modSLP, pred2) %>%
ggplot(aes(x=modSLP, y=pred2)) +
geom_point(alpha=0.25, aes(size=n)) +
geom_smooth(method="lm", aes(weight=n)) +
labs(title="Predicted vs. Actual Sea Level Pressure - Altitude and Temperature as Predictor",
subtitle="Lincoln, NE (2016) Hourly METAR", x="Sea Level Pressure", y="Predicted"
)
Cloud data is also included in the METAR, with the type of clouds being described as:
The ceiling is considered the lowest height that is measured as any of OVC, BKN, or VV.
Example code includes:
# Extract the CLR records
mtxCLR <- str_extract_all(metarKLNK$metar, pattern=" CLR ", simplify=TRUE)
if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)
# Extract the VV records
mtxVV <- str_extract_all(metarKLNK$metar, pattern="VV(\\d{3})", simplify=TRUE)
if (dim(mtxVV)[[2]] != 1) { stop("Extracted 2+ VV from some METAR; investigate") }
isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)
# Extract the FEW records
mtxFEW <- str_extract_all(metarKLNK$metar, pattern="FEW(\\d{3})", simplify=TRUE)
numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )
# Extract the SCT records
mtxSCT <- str_extract_all(metarKLNK$metar, pattern="SCT(\\d{3})", simplify=TRUE)
numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )
# Extract the BKN records
mtxBKN <- str_extract_all(metarKLNK$metar, pattern="BKN(\\d{3})", simplify=TRUE)
numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )
# Extract the OVC records
mtxOVC <- str_extract_all(metarKLNK$metar, pattern="OVC(\\d{3})", simplify=TRUE)
numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )
# Summarize as a data frame
tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW,
numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
)
# Get the counts
# As expected, if isCLR then nothing else, and if isVV then nothing else
tblClouds %>%
count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
as.data.frame()
## isCLR isVV numFEW numSCT numBKN numOVC n
## 1 0 0 0 0 0 0 6
## 2 0 0 0 0 0 1 1389
## 3 0 0 0 0 1 0 307
## 4 0 0 0 0 1 1 250
## 5 0 0 0 0 2 0 50
## 6 0 0 0 0 2 1 45
## 7 0 0 0 0 3 0 8
## 8 0 0 0 1 0 0 230
## 9 0 0 0 1 0 1 73
## 10 0 0 0 1 1 0 52
## 11 0 0 0 1 1 1 42
## 12 0 0 0 1 2 0 17
## 13 0 0 0 2 0 0 16
## 14 0 0 0 2 0 1 9
## 15 0 0 0 2 1 0 6
## 16 0 0 1 0 0 0 380
## 17 0 0 1 0 0 1 90
## 18 0 0 1 0 1 0 46
## 19 0 0 1 0 1 1 62
## 20 0 0 1 0 2 0 9
## 21 0 0 1 1 0 0 39
## 22 0 0 1 1 0 1 24
## 23 0 0 1 1 1 0 28
## 24 0 0 1 2 0 0 10
## 25 0 0 2 0 0 0 24
## 26 0 0 2 0 0 1 6
## 27 0 0 2 0 1 0 5
## 28 0 0 2 1 0 0 3
## 29 0 0 3 0 0 0 2
## 30 0 1 0 0 0 0 33
## 31 1 0 0 0 0 0 5552
# Investigate the problem data
metarKLNK$metar[rowSums(tblClouds, na.rm=TRUE)==0]
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"
# Get the counts of most obscuration
tblClouds %>%
filter(rowSums(., na.rm=TRUE) > 0) %>%
mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC",
numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW",
TRUE ~ "Error"
), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
)
) %>%
ggplot(aes(x=wType, y=..count../sum(..count..))) +
geom_bar() +
labs(title="Highest Obscuration by Cloud - Lincoln, NE (2016)", x="Cloud Type",
y="Proportion of Hourly Measurements"
)
# Integrate the clouds data
mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)
# Cycle through to find levels of a given type
ckClouds <- function(cloudType) {
isKey <- which(apply(mtxCloud, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
as.integer(str_replace(mtxCloud[, min(isKey)], cloudType, "")) * 100
}
lowOVC <- ckClouds("OVC")
lowVV <- ckClouds("VV")
lowBKN <- ckClouds("BKN")
lowSCT <- ckClouds("SCT")
lowFEW <- ckClouds("FEW")
# Integrate the lowest cloud type by level
lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
lowCloud
## # A tibble: 8,813 x 5
## lowVV lowOVC lowBKN lowSCT lowFEW
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA 2800 NA NA NA
## 2 NA 2700 NA NA NA
## 3 NA 2600 NA NA NA
## 4 NA 2700 NA NA NA
## 5 NA 2700 NA 2100 NA
## 6 NA 2700 NA NA NA
## 7 NA 2700 NA NA NA
## 8 NA 2700 NA NA NA
## 9 NA NA NA NA 2600
## 10 NA NA NA NA NA
## # ... with 8,803 more rows
# Get the lowest cloud level
minCloud <- lowCloud
minCloud[is.na(minCloud)] <- 999999
minCloudLevel <- apply(minCloud, 1, FUN=min)
minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)
noCloudPct <- mean(minCloudLevel == 999999)
noCeilingPct <- mean(minCeilingLevel == 999999)
# Plot the minimum cloud level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
filter(minCloudLevel != 999999) %>%
ggplot(aes(x=minCloudLevel)) +
geom_bar(aes(y=..count../sum(..count..))) +
geom_text(aes(x=2500, y=0.04,
label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
)
) +
labs(x="Height [ft]", y="Proportion", title="Minimum Cloud Height (when some clouds exist)",
subtitle="Lincoln, NE (2016)"
)
# Plot the minimum ceiling level (where it exists)
data.frame(minCloudLevel, minCeilingLevel) %>%
filter(minCeilingLevel != 999999) %>%
ggplot(aes(x=minCeilingLevel)) +
geom_bar(aes(y=..count../sum(..count..))) +
geom_text(aes(x=2500, y=0.04,
label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
)
) +
labs(x="Height [ft]", y="Proportion", title="Minimum Ceiling Height (when a ceiling exists)",
subtitle="Lincoln, NE (2016)"
)
The month of the year is an interesting data point for plotting against.
Example code includes:
# Integrate the cloud data and convert month to a factor
dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC",
numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW",
TRUE ~ "Error"
), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
),
month=factor(month, levels=1:12, labels=month.abb)
)
dfFull <- tibble::as_tibble(dfFull)
str(dfFull)
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 28 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : int 5 0 0 3 5 9 0 3 0 0 ...
## $ WindGust : num NA NA NA NA NA NA NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: num 10 10 10 10 10 10 10 10 10 10 ...
## $ TempC : int -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## $ DewC : int -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## $ Altimeter : int 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## $ SLP : int 275 277 277 281 286 289 290 291 295 295 ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
## $ TempF : num 27 26.1 27 27 27 ...
## $ DewF : num 19.9 19.9 19.9 21 19.9 ...
## $ modSLP : num 1028 1028 1028 1028 1029 ...
## $ month : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
## $ isCLR : num 0 0 0 0 0 0 0 0 0 1 ...
## $ isVV : num 0 0 0 0 0 0 0 0 0 0 ...
## $ htVV : num NA NA NA NA NA NA NA NA NA NA ...
## $ numFEW : int 0 0 0 0 0 0 0 0 1 0 ...
## $ numSCT : int 0 0 0 0 1 0 0 0 0 0 ...
## $ numBKN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ numOVC : int 1 1 1 1 1 1 1 1 0 0 ...
## $ lowVV : num NA NA NA NA NA NA NA NA NA NA ...
## $ lowOVC : num 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
## $ lowBKN : num NA NA NA NA NA NA NA NA NA NA ...
## $ lowSCT : num NA NA NA NA 2100 NA NA NA NA NA ...
## $ lowFEW : num NA NA NA NA NA NA NA NA 2600 NA ...
## $ wType : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
# Run the boxplot of a factor against a numeric variable
plotFactorNumeric <- function(fctVar, numVar, title=NULL) {
if (is.null(title)) { title <- paste0("Lincoln, NE (2016) Hourly Weather - ", numVar, " vs. ", fctVar) }
p <- dfFull %>%
filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
ggplot(aes_string(x=fctVar, y=numVar)) +
geom_boxplot(fill="lightblue") +
labs(title=title)
print(p)
}
# Run for all of the key variables against wind speed and cloud type
keyVar <- c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF")
for (var in keyVar) { plotFactorNumeric("month", var) }
for (var in keyVar) { plotFactorNumeric("wType", var) }
# Create stacked bars for cloud type by month
dfFull %>%
filter(!is.na(wType), wType!="Error") %>%
ggplot(aes(x=month, fill=wType)) +
geom_bar(position="fill") +
labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")
Example 12 can be converted to functional form so that the process can be applied to other reporting stations and time periods.
Example code includes:
# Function to make an initial read of the data, filter to METAR records, and check date-times
readMETAR <- function(fileName="./RInputFiles/metar_klnk_2016.txt", timeZ="54Z",
expMin=as.POSIXct("2015-12-31 00:54:00", tz="UTC"), expDays=365
) {
# Load METAR data
initRead <- readr::read_csv(fileName, na=c("", "NA", "M"))
str(initRead, give.attr=FALSE)
# Filter to only data that ends with times ending in 54Z
filterRead <- initRead %>%
filter(str_detect(metar, timeZ))
dim(filterRead)
# Check that the dates and times included are as expected
expDate <- expMin + lubridate::hours(0:(24*expDays - 1))
# Observations expected but not recorded
cat("\n*** OBSERVATIONS EXPECTED BUT NOT RECORDED ***\n")
print(as.POSIXct(setdiff(expDate, filterRead$valid), origin="1970-01-01", tz="UTC"))
# Observations recorded but not expected
cat("\n*** OBSERVATIONS RECORDED BUT NOT EXPECTED ***\n")
print(as.POSIXct(setdiff(filterRead$valid, expDate), origin="1970-01-01", tz="UTC"))
# Confirmation of uniqueness
cat("\n*** Are the extracted records unique? ***\n")
print(length(unique(filterRead$valid)) == length(filterRead$valid))
cat("\n")
# Return the dataset as a tibble
tibble::as_tibble(filterRead)
}
funcMETAR <- readMETAR(expDays=368)
## Parsed with column specification:
## cols(
## .default = col_double(),
## station = col_character(),
## valid = col_datetime(format = ""),
## p01i = col_character(),
## skyc1 = col_character(),
## skyc2 = col_character(),
## skyc3 = col_character(),
## skyc4 = col_logical(),
## skyl4 = col_logical(),
## wxcodes = col_character(),
## ice_accretion_1hr = col_character(),
## ice_accretion_3hr = col_character(),
## ice_accretion_6hr = col_character(),
## peak_wind_time = col_datetime(format = ""),
## metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of 29 variables:
## $ station : chr "LNK" "LNK" "LNK" "LNK" ...
## $ valid : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## $ tmpf : num 27 26.1 27 27 27 ...
## $ dwpf : num 19.9 19.9 19.9 21 19.9 ...
## $ relh : num 74.5 77.3 74.5 78 74.5 ...
## $ drct : num 300 0 0 280 310 10 0 10 20 0 ...
## $ sknt : num 5 0 0 3 5 9 0 3 3 0 ...
## $ p01i : chr "0.00" "0.00" "0.00" "0.00" ...
## $ alti : num 30.3 30.3 30.3 30.3 30.3 ...
## $ mslp : num 1028 1028 1028 1028 1029 ...
## $ vsby : num 10 10 10 10 10 10 10 10 10 10 ...
## $ gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ skyc1 : chr "OVC" "OVC" "OVC" "OVC" ...
## $ skyc2 : chr NA NA NA NA ...
## $ skyc3 : chr NA NA NA NA ...
## $ skyc4 : logi NA NA NA NA NA NA ...
## $ skyl1 : num 2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
## $ skyl2 : num NA NA NA NA 2700 NA NA NA NA NA ...
## $ skyl3 : num NA NA NA NA NA NA NA NA NA NA ...
## $ skyl4 : logi NA NA NA NA NA NA ...
## $ wxcodes : chr NA NA NA NA ...
## $ ice_accretion_1hr: chr NA NA NA NA ...
## $ ice_accretion_3hr: chr NA NA NA NA ...
## $ ice_accretion_6hr: chr NA NA NA NA ...
## $ peak_wind_gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ peak_wind_drct : num NA NA NA NA NA NA NA NA NA NA ...
## $ peak_wind_time : POSIXct, format: NA NA ...
## $ feel : num 20.4 26.1 27 22.9 20.4 ...
## $ metar : chr "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
## [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
## [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
## [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
## [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
## [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
##
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
##
## *** Are the extracted records unique? ***
## [1] TRUE
funcMETAR
## # A tibble: 8,813 x 29
## station valid tmpf dwpf relh drct sknt p01i alti mslp
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 LNK 2015-12-31 00:54:00 27.0 19.9 74.5 300 5 0.00 30.3 1028.
## 2 LNK 2015-12-31 01:54:00 26.1 19.9 77.3 0 0 0.00 30.3 1028.
## 3 LNK 2015-12-31 02:54:00 27.0 19.9 74.5 0 0 0.00 30.3 1028.
## 4 LNK 2015-12-31 03:54:00 27.0 21.0 78 280 3 0.00 30.3 1028.
## 5 LNK 2015-12-31 04:54:00 27.0 19.9 74.5 310 5 0.00 30.3 1029.
## 6 LNK 2015-12-31 05:54:00 21.0 14 73.9 10 9 0.00 30.3 1029.
## 7 LNK 2015-12-31 06:54:00 19.0 12.0 73.7 0 0 0.00 30.3 1029
## 8 LNK 2015-12-31 07:54:00 18.0 12.0 77.2 10 3 0.00 30.3 1029.
## 9 LNK 2015-12-31 08:54:00 14 10.0 83.9 0 0 0.00 30.3 1030.
## 10 LNK 2015-12-31 09:54:00 16.0 10.9 80.1 0 0 0.00 30.4 1030.
## # ... with 8,803 more rows, and 19 more variables: vsby <dbl>, gust <dbl>,
## # skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## # skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## # ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## # peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## # feel <dbl>, metar <chr>
# Extract wind speeds and direction
# The general wind format is dddssGssKT where ddd is the direction (VRB meaning variable), the main ss is the speed, and the Gss is the gust speed (optional and not always displayed)
extractWind <- function(met) {
mtxWind <- met %>%
pull(metar) %>%
str_match(pattern="(\\d{3}|VRB)(\\d{2})(G\\d{2})?KT")
cat("\n*** First 6 winds and parsing ***\n")
print(head(mtxWind))
cat("\n*** Table of WIND DIRECTION ***\n")
print(table(mtxWind[, 2], useNA="ifany"))
cat("\n*** Table of WIND SPEED ***\n")
print(table(mtxWind[, 3], useNA="ifany"))
cat("\n*** Table of WIND GUST ***\n")
print(table(mtxWind[, 4], useNA="ifany"))
# Verify that winds not captured are in fact missing from the METAR
cat("\n *** WIND DATA WAS NOT CAPTURED FROM: *** \n")
print(met[which(is.na(mtxWind[, 2])), "metar"])
cat("\n")
met %>%
mutate(dirW=mtxWind[, 2],
spdW=as.numeric(mtxWind[, 3]),
gustW=as.numeric(str_replace(mtxWind[, 4], "G", ""))
)
}
windMETAR <- extractWind(funcMETAR)
##
## *** First 6 winds and parsing ***
## [,1] [,2] [,3] [,4]
## [1,] "30005KT" "300" "05" NA
## [2,] "00000KT" "000" "00" NA
## [3,] "00000KT" "000" "00" NA
## [4,] "28003KT" "280" "03" NA
## [5,] "31005KT" "310" "05" NA
## [6,] "01009KT" "010" "09" NA
##
## *** Table of WIND DIRECTION ***
##
## 000 010 020 030 040 050 060 070 080 090 100 110 120 130 140 150
## 875 269 199 146 135 121 108 95 88 65 102 158 169 245 241 339
## 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310
## 463 565 517 413 284 179 142 96 73 80 105 89 121 141 147 225
## 320 330 340 350 360 VRB <NA>
## 234 303 352 413 383 114 19
##
## *** Table of WIND SPEED ***
##
## 00 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
## 875 615 704 664 696 651 636 599 536 451 439 371 315 276 235 173
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 34
## 156 108 69 62 45 33 23 13 14 9 10 6 6 2 1 1
## <NA>
## 19
##
## *** Table of WIND GUST ***
##
## G14 G15 G16 G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29
## 6 11 11 16 30 59 69 87 83 107 101 83 62 61 61 37
## G30 G31 G32 G33 G34 G35 G36 G37 G38 G39 G40 G41 G42 G43 G45 <NA>
## 28 24 18 13 12 14 6 6 10 11 2 3 1 2 2 7777
##
## *** WIND DATA WAS NOT CAPTURED FROM: ***
## # A tibble: 19 x 1
## metar
## <chr>
## 1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028
## 2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083
## 3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
## 4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
## 5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $
## 6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007
## 7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
## 8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $
## 9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
windMETAR
## # A tibble: 8,813 x 32
## station valid tmpf dwpf relh drct sknt p01i alti mslp
## <chr> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 LNK 2015-12-31 00:54:00 27.0 19.9 74.5 300 5 0.00 30.3 1028.
## 2 LNK 2015-12-31 01:54:00 26.1 19.9 77.3 0 0 0.00 30.3 1028.
## 3 LNK 2015-12-31 02:54:00 27.0 19.9 74.5 0 0 0.00 30.3 1028.
## 4 LNK 2015-12-31 03:54:00 27.0 21.0 78 280 3 0.00 30.3 1028.
## 5 LNK 2015-12-31 04:54:00 27.0 19.9 74.5 310 5 0.00 30.3 1029.
## 6 LNK 2015-12-31 05:54:00 21.0 14 73.9 10 9 0.00 30.3 1029.
## 7 LNK 2015-12-31 06:54:00 19.0 12.0 73.7 0 0 0.00 30.3 1029
## 8 LNK 2015-12-31 07:54:00 18.0 12.0 77.2 10 3 0.00 30.3 1029.
## 9 LNK 2015-12-31 08:54:00 14 10.0 83.9 0 0 0.00 30.3 1030.
## 10 LNK 2015-12-31 09:54:00 16.0 10.9 80.1 0 0 0.00 30.4 1030.
## # ... with 8,803 more rows, and 22 more variables: vsby <dbl>, gust <dbl>,
## # skyc1 <chr>, skyc2 <chr>, skyc3 <chr>, skyc4 <lgl>, skyl1 <dbl>,
## # skyl2 <dbl>, skyl3 <dbl>, skyl4 <lgl>, wxcodes <chr>,
## # ice_accretion_1hr <chr>, ice_accretion_3hr <chr>, ice_accretion_6hr <chr>,
## # peak_wind_gust <dbl>, peak_wind_drct <dbl>, peak_wind_time <dttm>,
## # feel <dbl>, metar <chr>, dirW <chr>, spdW <dbl>, gustW <dbl>
# Generate basic wind plots
basicWindPlots <- function(met, desc="Lincoln, NE", gran="KLNK METAR (2016)") {
# Plot for the wind direction
wDir <- met %>%
ggplot(aes(x=dirW)) +
geom_bar() +
labs(title=paste0(desc, " Wind Direction"), subtitle=gran,
y="# Hourly Observations", x="Wind Direction"
)
print(wDir)
# Plot for the minimum, average, and maximum wind speed by wind direction
# Wind direction 000 is reserved for 0 KT wind, while VRB is reserved for 3-6 KT variable winds
wSpeedByDir <- met %>%
filter(!is.na(dirW)) %>%
group_by(dirW) %>%
summarize(minWind=min(spdW), meanWind=mean(spdW), maxWind=max(spdW)) %>%
ggplot(aes(x=dirW)) +
geom_point(aes(y=meanWind), color="red", size=2) +
geom_errorbar(aes(ymin=minWind, ymax=maxWind)) +
labs(title=paste0(desc, " Wind Speed (Max, Mean, Min) By Wind Direction"), subtitle=gran,
y="Wind Speed [KT]", x="Wind Direction"
)
print(wSpeedByDir)
# Plot for the wind speed
pctZero <- sum(met$spdW==0, na.rm=TRUE) / length(met$spdW)
wSpeed <- met %>%
ggplot(aes(x=spdW)) +
geom_bar(aes(y=..count../sum(..count..))) +
labs(title=paste0(round(100*pctZero), "% of wind speeds in ", desc, " measure 0 Knots"),
subtitle=gran,
y="% Hourly Observations", x="Wind Speed {KT]"
)
print(wSpeed)
wPolarDirSpeed <- met %>%
filter(!is.na(dirW), dirW != "VRB", dirW != "000") %>%
mutate(dirW=as.numeric(dirW)) %>%
group_by(dirW, spdW) %>%
summarize(n=n()) %>%
ggplot(aes(x=spdW, y=dirW)) +
geom_point(alpha=0.1, aes(size=n)) +
coord_polar(theta="y") +
labs(title=paste0(desc, " Direction vs. Wind Speed"), subtitle=gran, x="Wind Speed [KT]") +
scale_y_continuous(limits=c(0, 360), breaks=c(0, 90, 180, 270, 360)) +
scale_x_continuous(limits=c(0, 30), breaks=c(0, 5, 10, 15, 20, 25, 30)) +
geom_point(aes(x=0, y=0), color="red", size=2)
print(wPolarDirSpeed)
}
basicWindPlots(windMETAR)
## Warning: Removed 19 rows containing non-finite values (stat_count).
## Warning: Removed 4 rows containing missing values (geom_point).
METAR parsing can also be converted to a functional form. This will need to be modified to be more general, since the codes used for a few things like clouds can vary from station to station.
Example code includes:
# Code for the initial METAR parsing
initialParseMETAR <- function(met, val, labs) {
# Pull the METAR data
metAll <- met %>%
pull(metar)
# Find the number of matching elements
cat("\n*** Tentative Summary of Element Parsing *** \n")
str_detect(metAll, pattern=val) %>%
table() %>%
print()
# The strings that do not match have errors in the raw data (typically, missing wind speed)
cat("\n*** Data Not Matched *** \n")
print(metAll[!str_detect(metAll, pattern=val)])
# A matrix of string matches can be obtained
mtxParse <- str_match(metAll, pattern=val)
cat("\n*** Parsing matrix summary *** \n")
print(dim(mtxParse))
print(head(mtxParse))
# Create a data frame
dfParse <- data.frame(mtxParse, stringsAsFactors=FALSE) %>%
mutate(dtime=met$valid, origMETAR=met$metar)
names(dfParse) <- c(labs, "dtime", "origMETAR")
dfParse <- tibble::as_tibble(dfParse)
cat("\n*** Summary of the parsed data *** \n")
glimpse(dfParse)
dfParse
}
# Create a search string for METAR
valMet <- "54Z.*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})"
# Create the names for the search string to parse in to
labsMet <- c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility",
"TempC", "DewC", "Altimeter", "SLP", "FahrC"
)
# Run the METAR parsing on the raw data
initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
##
## *** Tentative Summary of Element Parsing ***
## .
## FALSE TRUE
## 23 8790
##
## *** Data Not Matched ***
## [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"
## [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"
## [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"
## [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"
## [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $"
## [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"
## [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"
## [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
## [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"
##
## *** Parsing matrix summary ***
## [1] 8813 11
## [,1]
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"
## [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] "300" "05" NA " " "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA " " "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA " " "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA " " "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA " " "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA " " "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
##
## *** Summary of the parsed data ***
## Observations: 8,813
## Variables: 13
## $ METAR <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
initMETAR
## # A tibble: 8,813 x 13
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 54Z ~ 300 05 <NA> " " 10SM M03 M07 A3029 SLP2~
## 2 54Z ~ 000 00 <NA> " " 10SM M03 M07 A3030 SLP2~
## 3 54Z ~ 000 00 <NA> " " 10SM M03 M07 A3030 SLP2~
## 4 54Z ~ 280 03 <NA> " " 10SM M03 M06 A3031 SLP2~
## 5 54Z ~ 310 05 <NA> " " 10SM M03 M07 A3033 SLP2~
## 6 54Z ~ 010 09 <NA> " " 10SM M06 M10 A3033 SLP2~
## 7 54Z ~ 000 00 <NA> " " 10SM M07 M11 A3034 SLP2~
## 8 54Z ~ 010 03 <NA> " " 10SM M08 M11 A3034 SLP2~
## 9 54Z ~ 000 00 <NA> " " 10SM M10 M12 A3034 SLP2~
## 10 54Z ~ 000 00 <NA> " " 10SM M09 M12 A3035 SLP2~
## # ... with 8,803 more rows, and 3 more variables: FahrC <chr>, dtime <dttm>,
## # origMETAR <chr>
# Helper function for generating plots by key variables
plotcountsByMetric <- function(df, mets) {
# Plot of counts by key metric
for (x in mets) {
p <- df %>%
group_by_at(vars(all_of(x))) %>%
summarize(n=n()) %>%
ggplot(aes_string(x=x, y="n")) +
geom_col() +
labs(title=x, y="Count")
print(p)
}
}
# Code for the conversion of METAR to meaningful numeric
# Should make this much more general later
convertMETAR <- function(met, metrics, seed=2003211416) {
# Convert to numeric where appropriate
dfParse <- met %>%
mutate(WindSpeed = as.integer(WindSpeed),
WindGust = as.numeric(WindGust),
Visibility = as.numeric(str_replace(Visibility, "SM", "")),
TempC = as.integer(str_replace(TempC, "M", "-")),
DewC = as.integer(str_replace(DewC, "M", "-")),
Altimeter = as.integer(str_replace(Altimeter, "A", "")),
SLP = as.integer(str_replace(SLP, "SLP", "")),
TempF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 2, 5), pattern="^1", "-"))/10,
DewF = 32 + 1.8 * as.integer(str_replace(str_sub(FahrC, 6, 9), pattern="^1", "-"))/10
)
# Investigate the data
cat("\n *** Parsed data structure, head, tail, and random sample *** \n")
str(dfParse)
print(head(dfParse))
print(tail(dfParse))
set.seed(seed)
dfParse %>%
sample_n(20) %>%
print()
# Check for NA values
cat("\n *** Number of NA values *** \n")
print(colSums(is.na(dfParse)))
# Plot of counts by key metric
plotcountsByMetric(dfParse, mets=metrics)
# Return the parsed dataset
dfParse
}
keyMetric <- c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC",
"DewC", "Altimeter", "SLP", "TempF", "DewF"
)
convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
## Warning: NAs introduced by coercion
##
## *** Parsed data structure, head, tail, and random sample ***
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 15 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : int 5 0 0 3 5 9 0 3 0 0 ...
## $ WindGust : num NA NA NA NA NA NA NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: num 10 10 10 10 10 10 10 10 10 10 ...
## $ TempC : int -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## $ DewC : int -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## $ Altimeter : int 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## $ SLP : int 275 277 277 281 286 289 290 291 295 295 ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
## $ dtime : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## $ origMETAR : chr "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ TempF : num 27 26.1 27 27 27 ...
## $ DewF : num 19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 300 5 NA " " 10 -3 -7 3029 275
## 2 54Z ~ 000 0 NA " " 10 -3 -7 3030 277
## 3 54Z ~ 000 0 NA " " 10 -3 -7 3030 277
## 4 54Z ~ 280 3 NA " " 10 -3 -6 3031 281
## 5 54Z ~ 310 5 NA " " 10 -3 -7 3033 286
## 6 54Z ~ 010 9 NA " " 10 -6 -10 3033 289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## # TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 160 6 NA " " 10 2 -4 2993 147
## 2 54Z ~ VRB 4 NA " " 10 4 -4 2990 139
## 3 54Z ~ 130 7 NA " " 10 4 -7 2989 134
## 4 54Z ~ 110 7 NA " " 10 4 -7 2988 130
## 5 54Z ~ 100 10 NA " " 10 3 -5 2986 125
## 6 54Z ~ 100 10 NA " " 10 3 -6 2986 123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## # TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 360 7 NA " " 10 21 15 2995 130
## 2 54Z ~ 210 13 NA " " 10 31 4 2988 108
## 3 54Z ~ 340 11 NA " " 10 -7 -13 3025 259
## 4 54Z ~ 240 9 NA " " 10 13 8 2961 18
## 5 54Z ~ 310 6 NA " " 10 -7 -19 3044 329
## 6 54Z ~ 040 7 NA " " 10 26 14 3010 179
## 7 54Z ~ 170 17 NA " " 10 31 22 2983 90
## 8 54Z ~ 160 7 NA " " 10 -1 -4 3002 182
## 9 54Z ~ 310 9 NA " " 10 13 9 3021 226
## 10 54Z ~ 110 10 NA " " 10 16 16 2990 116
## 11 54Z ~ 140 5 NA " " 10 19 18 3013 199
## 12 54Z ~ 110 6 NA " " 10 20 4 3012 197
## 13 54Z ~ 220 4 NA " " 10 3 -4 3042 311
## 14 54Z ~ 350 3 NA " " 8 9 8 3011 194
## 15 54Z ~ 080 8 NA " " 10 -12 -18 3046 340
## 16 54Z ~ 320 7 NA " " 10 16 11 2998 144
## 17 54Z ~ 330 6 NA " " 10 9 5 3016 214
## 18 54Z ~ 340 13 NA " " 3 0 -1 2979 99
## 19 54Z ~ 190 5 NA " " 10 3 -4 3024 251
## 20 54Z ~ 320 18 NA " " 10 24 12 2999 147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## # TempF <dbl>, DewF <dbl>
##
## *** Number of NA values ***
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC
## 23 23 23 8813 23 23 23
## DewC Altimeter SLP FahrC dtime origMETAR TempF
## 23 23 23 23 0 0 23
## DewF
## 23
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
# There are three obvious issues
# Visibility is not correctly picked up when there is a / such as 1/2 SM
# Wind gusts are never picked up
# Sea Level Pressure is missing a digit
# Address the visibility issues
getVisibility <- function(curMet, origMet) {
# Get the original METAR data
metAll <- origMet %>%
pull(metar)
# Correct for visibility
# Areas that have \\d \\d/\\dSM
sm1 <- which(str_detect(metAll, pattern=" \\d/\\dSM"))
sm2 <- which(str_detect(metAll, pattern=" \\d \\d/\\dSM"))
valSM1 <- str_match(metAll, pattern="\\d/\\dSM")[sm1]
valSM1 <- str_replace(valSM1, "SM", "")
valSM1 <- as.integer(str_sub(valSM1, 1, 1)) / as.integer(str_sub(valSM1, 3, 3))
valSM2 <- str_match(metAll, pattern=" \\d \\d/\\dSM")[sm2]
valSM2 <- as.integer(str_sub(valSM2, 2, 2))
curMet[sm1, "Visibility"] <- valSM1
curMet[sm2, "Visibility"] <- curMet[sm2, "Visibility"] + valSM2
curMet %>%
count(Visibility) %>%
print()
curMet
}
parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
## # A tibble: 18 x 2
## Visibility n
## <dbl> <int>
## 1 0.25 20
## 2 0.5 16
## 3 0.75 15
## 4 1 19
## 5 1.25 13
## 6 1.5 17
## 7 1.75 21
## 8 2 50
## 9 2.5 38
## 10 3 70
## 11 4 108
## 12 5 108
## 13 6 146
## 14 7 189
## 15 8 221
## 16 9 290
## 17 10 7449
## 18 NA 23
# Correct for wind gusts
getWindGusts <- function(curMet, origMet) {
metAll <- origMet %>%
pull(metar)
gustCheck <- which(str_detect(metAll, pattern="\\d{5}G\\d{2}KT"))
valGust <- str_match(metAll, pattern="\\d{5}G\\d{2}KT")[gustCheck]
valGust <- as.integer(str_sub(valGust, 7, 8))
curMet[gustCheck, "WindGust"] <- valGust
curMet %>%
count(WindGust) %>%
as.data.frame %>%
print()
curMet
}
parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
## WindGust n
## 1 14 4
## 2 15 11
## 3 16 11
## 4 17 16
## 5 18 29
## 6 19 59
## 7 20 69
## 8 21 87
## 9 22 83
## 10 23 107
## 11 24 101
## 12 25 83
## 13 26 62
## 14 27 61
## 15 28 61
## 16 29 37
## 17 30 28
## 18 31 24
## 19 32 18
## 20 33 13
## 21 34 12
## 22 35 14
## 23 36 6
## 24 37 6
## 25 38 10
## 26 39 11
## 27 40 2
## 28 41 3
## 29 42 1
## 30 43 2
## 31 45 2
## 32 NA 7780
# Correct for SLP
fixSLP <- function(curMet) {
dfParse <- curMet %>%
mutate(modSLP=ifelse(curMet$SLP < 500, 1000 + curMet$SLP/10, 900 + curMet$SLP/10))
p <- dfParse %>%
group_by(SLP, modSLP) %>%
summarize(n=n()) %>%
ggplot(aes(x=SLP, y=modSLP, size=n)) +
geom_point(alpha=0.3)
print(p)
dfParse
}
parseMETAR <- fixSLP(parseMETAR)
## Warning: Removed 1 rows containing missing values (geom_point).
# Check updated plots
plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
The relationships between METAR variables can also be converted to functional form.
Example code includes:
# Function to calculate, display, and plot variable correlations
corMETAR <- function(met, numVars, subT="") {
# Keep only complete cases and report on data kept
dfUse <- met %>%
select_at(vars(all_of(numVars))) %>%
filter(complete.cases(.))
nU <- nrow(dfUse)
nM <- nrow(met)
myPct <- round(100*nU/nM, 1)
cat("\n *** Correlations use ", nU, " complete cases (", myPct, "% of ", nM, " total) ***\n", sep="")
# Create the correlation matrix
mtxCorr <- dfUse %>%
cor()
# Print the correlations
mtxCorr %>%
round(2) %>%
print()
# Display a heat map
corrplot::corrplot(mtxCorr, method="color", title=paste0("Hourly Weather Correlations\n", subT))
}
# Define key numeric variables
coreNum <- c("TempC", "TempF", "DewC", "DewF", "Altimeter", "modSLP", "WindSpeed", "Visibility")
# Run the correlations function
corMETAR(parseMETAR, numVars=coreNum)
##
## *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
## TempC TempF DewC DewF Altimeter modSLP WindSpeed Visibility
## TempC 1.00 1.00 0.91 0.91 -0.38 -0.48 0.13 0.19
## TempF 1.00 1.00 0.91 0.91 -0.38 -0.48 0.13 0.19
## DewC 0.91 0.91 1.00 1.00 -0.38 -0.48 -0.01 0.07
## DewF 0.91 0.91 1.00 1.00 -0.38 -0.48 -0.01 0.07
## Altimeter -0.38 -0.38 -0.38 -0.38 1.00 0.99 -0.26 0.07
## modSLP -0.48 -0.48 -0.48 -0.48 0.99 1.00 -0.25 0.04
## WindSpeed 0.13 0.13 -0.01 -0.01 -0.26 -0.25 1.00 -0.01
## Visibility 0.19 0.19 0.07 0.07 0.07 0.04 -0.01 1.00
# Create a function for plotting two variables against each other
plotNumCor <- function(met, var1, var2, title=NULL, subT="") {
if (is.null(title))
{ title <- paste0("Hourly Correlations of ", var1, " and ", var2) }
p <- met %>%
group_by_at(vars(all_of(c(var1, var2)))) %>%
summarize(n=n()) %>%
ggplot(aes_string(x=var1, y=var2)) +
geom_point(alpha=0.5, aes_string(size="n")) +
geom_smooth(method="lm", aes_string(weight="n")) +
labs(x=var1, y=var2, title=title, subtitle=subT)
print(p)
}
var1List <- c("TempC", "DewC", "Altimeter", "TempF", "TempF", "TempF", "Altimeter")
var2List <- c("TempF", "DewF", "modSLP", "DewF", "Altimeter", "modSLP", "WindSpeed")
for (n in 1:length(var1List)) {
plotNumCor(parseMETAR, var1List[n], var2List[n])
}
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
# Function for linear regressions on METAR data
lmMETAR <- function(met, y, x, yName, subT="Lincoln, NE (2016) Hourly METAR") {
# Convert to formula
myChar <- paste0(y, " ~ ", x)
cat("\n *** Regression call is:", myChar, "***\n")
# Run regression
regr <- lm(formula(myChar), data=met)
# Summarize regression
print(summary(regr))
# Predict the new values
pred <- predict(regr, newdata=met)
# Plot the predictions
p <- met %>%
select_at(vars(all_of(y))) %>%
mutate(pred=pred) %>%
group_by_at(vars(all_of(c(y, "pred")))) %>%
summarize(n=n()) %>%
ggplot(aes_string(x=y, y="pred")) +
geom_point(aes(size=n), alpha=0.25) +
geom_smooth(aes(weight=n), method="lm") +
labs(title=paste0("Predicted vs. Actual ", yName, " - ", x, " as Predictor"),
subtitle=subT, x=paste0("Actual ", yName), y=paste0("Predicted ", yName)
)
print(p)
}
lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure")
##
## *** Regression call is: modSLP ~ Altimeter ***
##
## Call:
## lm(formula = formula(myChar), data = met)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8890 -0.8172 -0.1578 0.7610 2.7890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.233e+01 1.406e+00 -44.34 <2e-16 ***
## Altimeter 3.594e-01 4.683e-04 767.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9691 on 8788 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.9853, Adjusted R-squared: 0.9853
## F-statistic: 5.889e+05 on 1 and 8788 DF, p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure")
##
## *** Regression call is: modSLP ~ Altimeter + TempF ***
##
## Call:
## lm(formula = formula(myChar), data = met)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.24130 -0.26737 0.00686 0.25214 1.23326
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.002e+01 5.615e-01 -17.84 <2e-16 ***
## Altimeter 3.428e-01 1.857e-04 1845.63 <2e-16 ***
## TempF -4.559e-02 1.921e-04 -237.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3561 on 8787 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.998, Adjusted R-squared: 0.998
## F-statistic: 2.209e+06 on 2 and 8787 DF, p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
Cloud data can also be extracted using the functional form.
Example code includes:
extractClouds <- function(met, metVar, subT="Lincoln, NE (2016) Hourly METAR") {
metAll <- met %>%
pull(metVar)
# Extract the CLR records
mtxCLR <- str_extract_all(metAll, pattern=" CLR ", simplify=TRUE)
if (dim(mtxCLR)[[2]] != 1) { stop("Extracted 2+ CLR from some METAR; investigate") }
isCLR <- ifelse(mtxCLR[, 1] == "", 0, 1)
# Extract the VV records
mtxVV <- str_extract_all(metAll, pattern="VV(\\d{3})", simplify=TRUE)
if (dim(mtxVV)[[2]] != 1) { stop("Extracted 2+ VV from some METAR; investigate") }
isVV <- ifelse(mtxVV[, 1] == "", 0, 1)
htVV <- ifelse(mtxVV[, 1] == "", NA, as.integer(str_replace(mtxVV[, 1], "VV", ""))*100)
# Extract the FEW records
mtxFEW <- str_extract_all(metAll, pattern="FEW(\\d{3})", simplify=TRUE)
numFEW <- apply(mtxFEW, 1, FUN=function(x) { sum((x!=""))} )
# Extract the SCT records
mtxSCT <- str_extract_all(metAll, pattern="SCT(\\d{3})", simplify=TRUE)
numSCT <- apply(mtxSCT, 1, FUN=function(x) { sum((x!=""))} )
# Extract the BKN records
mtxBKN <- str_extract_all(metAll, pattern="BKN(\\d{3})", simplify=TRUE)
numBKN <- apply(mtxBKN, 1, FUN=function(x) { sum((x!=""))} )
# Extract the OVC records
mtxOVC <- str_extract_all(metAll, pattern="OVC(\\d{3})", simplify=TRUE)
numOVC <- apply(mtxOVC, 1, FUN=function(x) { sum((x!=""))} )
# Summarize as a data frame
tblClouds <- tibble::tibble(isCLR=isCLR, isVV=isVV, htVV=htVV, numFEW=numFEW,
numSCT=numSCT, numBKN=numBKN, numOVC=numOVC
)
# Get the counts
cat("\n*** Counts by number of layers of each cloud type ***\n")
tblClouds %>%
count(isCLR, isVV, numFEW, numSCT, numBKN, numOVC) %>%
as.data.frame() %>%
print()
# Investigate the problem data
cat("\n*** METAR records where no clouds were extracted ***\n")
metAll[rowSums(tblClouds, na.rm=TRUE)==0] %>%
print()
# Plot the counts of most obscuration
p <- tblClouds %>%
filter(rowSums(., na.rm=TRUE) > 0) %>%
mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC",
numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW",
TRUE ~ "Error"
), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
)
) %>%
ggplot(aes(x=wType, y=..count../sum(..count..))) +
geom_bar() +
labs(title="Highest Obscuration by Cloud", subtitle=subT,
x="Cloud Type", y="Proportion of Hourly Measurements"
)
print(p)
# Integrate the clouds data
mtxCloud <- cbind(mtxVV, mtxOVC, mtxBKN, mtxSCT, mtxFEW, mtxCLR)
cat("\n*** Dimensions for the cloud matrix ***\n")
print(dim(mtxCloud))
list(tblClouds=tblClouds, mtxCloud=mtxCloud)
}
# Run the initial cloud extraction
initClouds <- extractClouds(parseMETAR, metVar="origMETAR")
##
## *** Counts by number of layers of each cloud type ***
## isCLR isVV numFEW numSCT numBKN numOVC n
## 1 0 0 0 0 0 0 6
## 2 0 0 0 0 0 1 1389
## 3 0 0 0 0 1 0 307
## 4 0 0 0 0 1 1 250
## 5 0 0 0 0 2 0 50
## 6 0 0 0 0 2 1 45
## 7 0 0 0 0 3 0 8
## 8 0 0 0 1 0 0 230
## 9 0 0 0 1 0 1 73
## 10 0 0 0 1 1 0 52
## 11 0 0 0 1 1 1 42
## 12 0 0 0 1 2 0 17
## 13 0 0 0 2 0 0 16
## 14 0 0 0 2 0 1 9
## 15 0 0 0 2 1 0 6
## 16 0 0 1 0 0 0 380
## 17 0 0 1 0 0 1 90
## 18 0 0 1 0 1 0 46
## 19 0 0 1 0 1 1 62
## 20 0 0 1 0 2 0 9
## 21 0 0 1 1 0 0 39
## 22 0 0 1 1 0 1 24
## 23 0 0 1 1 1 0 28
## 24 0 0 1 2 0 0 10
## 25 0 0 2 0 0 0 24
## 26 0 0 2 0 0 1 6
## 27 0 0 2 0 1 0 5
## 28 0 0 2 1 0 0 3
## 29 0 0 3 0 0 0 2
## 30 0 1 0 0 0 0 33
## 31 1 0 0 0 0 0 5552
##
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"
##
## *** Dimensions for the cloud matrix ***
## [1] 8813 11
str(initClouds)
## List of 2
## $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 7 variables:
## ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
## ..$ isVV : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ htVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
## ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
## ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
## $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
# Cycle through to find levels of a given type
ckClouds <- function(cloudType, mtx) {
isKey <- which(apply(mtx, 2, FUN=function(x) {sum(str_detect(x, cloudType))}) > 0)
as.integer(str_replace(mtx[, min(isKey)], cloudType, "")) * 100
}
# Function to create the lowest cloud levels
findLowestClouds <- function(mtxCloud, subT="Lincoln, NE (2016) Hourly METAR") {
# Find the lowest clouds by cloud type
lowOVC <- ckClouds("OVC", mtx=mtxCloud)
lowVV <- ckClouds("VV", mtx=mtxCloud)
lowBKN <- ckClouds("BKN", mtx=mtxCloud)
lowSCT <- ckClouds("SCT", mtx=mtxCloud)
lowFEW <- ckClouds("FEW", mtx=mtxCloud)
# Integrate the lowest cloud type by level
lowCloud <- tibble::tibble(lowVV, lowOVC, lowBKN, lowSCT, lowFEW)
cat("\n*** Lowest clouds by type tibble ***\n")
print(lowCloud)
# Get the lowest cloud level
minCloud <- lowCloud
minCloud[is.na(minCloud)] <- 999999
minCloudLevel <- apply(minCloud, 1, FUN=min)
minCeilingLevel <- apply(minCloud[, c("lowVV", "lowOVC", "lowBKN")], 1, FUN=min)
noCloudPct <- mean(minCloudLevel == 999999)
noCeilingPct <- mean(minCeilingLevel == 999999)
# Plot the minimum cloud level (where it exists)
p <- data.frame(minCloudLevel, minCeilingLevel) %>%
filter(minCloudLevel != 999999) %>%
ggplot(aes(x=minCloudLevel)) +
geom_bar(aes(y=..count../sum(..count..))) +
geom_text(aes(x=2500, y=0.04,
label=paste0(round(100*noCloudPct), "% of obs. have no clouds")
)
) +
labs(x="Height [ft]", y="Proportion",
title="Minimum Cloud Height (when some clouds exist)", subtitle=subT
)
print(p)
# Plot the minimum ceiling level (where it exists)
p <- data.frame(minCloudLevel, minCeilingLevel) %>%
filter(minCeilingLevel != 999999) %>%
ggplot(aes(x=minCeilingLevel)) +
geom_bar(aes(y=..count../sum(..count..))) +
geom_text(aes(x=2500, y=0.04,
label=paste0(round(100*noCeilingPct), "% of obs. have no ceiling")
)
) +
labs(x="Height [ft]", y="Proportion",
title="Minimum Ceiling Height (when a ceiling exists)", subtitle=subT
)
print(p)
list(lowCloud=lowCloud, minCeilingLevel=minCeilingLevel, minCloudLevel=minCloudLevel)
}
processedClouds <-findLowestClouds(initClouds$mtxCloud)
##
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
## lowVV lowOVC lowBKN lowSCT lowFEW
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA 2800 NA NA NA
## 2 NA 2700 NA NA NA
## 3 NA 2600 NA NA NA
## 4 NA 2700 NA NA NA
## 5 NA 2700 NA 2100 NA
## 6 NA 2700 NA NA NA
## 7 NA 2700 NA NA NA
## 8 NA 2700 NA NA NA
## 9 NA NA NA NA 2600
## 10 NA NA NA NA NA
## # ... with 8,803 more rows
str(processedClouds)
## List of 3
## $ lowCloud :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 5 variables:
## ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
## ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
## ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
## $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
## $ minCloudLevel : num [1:8813] 2800 2700 2600 2700 2100 ...
The month of the year is an interesting data point for plotting against.
Example code includes:
# Function to bind the existing parsed METAR data with the cloud data
bindMETAR <- function(dfParse, tblClouds, lowCloud) {
# Integrate the cloud data and convert month to a factor
dfFull <- cbind(dfParse, tblClouds, lowCloud) %>%
mutate(wType=factor(case_when(isCLR==1 ~ "CLR", isVV==1 ~ "VV", numOVC > 0 ~ "OVC",
numBKN > 0 ~ "BKN", numSCT > 0 ~ "SCT", numFEW > 0 ~ "FEW",
TRUE ~ "Error"
), levels=c("VV", "OVC", "BKN", "SCT", "FEW", "CLR", "Error")
),
month=factor(lubridate::month(dtime), levels=1:12, labels=month.abb)
)
dfFull <- tibble::as_tibble(dfFull)
str(dfFull)
dfFull
}
fullMETAR <- bindMETAR(dfParse=parseMETAR,
tblClouds=initClouds$tblClouds,
lowCloud=processedClouds$lowCloud
)
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 30 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : int 5 0 0 3 5 9 0 3 0 0 ...
## $ WindGust : num NA NA NA NA NA NA NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: num 10 10 10 10 10 10 10 10 10 10 ...
## $ TempC : int -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## $ DewC : int -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## $ Altimeter : int 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## $ SLP : int 275 277 277 281 286 289 290 291 295 295 ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
## $ dtime : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## $ origMETAR : chr "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ TempF : num 27 26.1 27 27 27 ...
## $ DewF : num 19.9 19.9 19.9 21 19.9 ...
## $ modSLP : num 1028 1028 1028 1028 1029 ...
## $ isCLR : num 0 0 0 0 0 0 0 0 0 1 ...
## $ isVV : num 0 0 0 0 0 0 0 0 0 0 ...
## $ htVV : num NA NA NA NA NA NA NA NA NA NA ...
## $ numFEW : int 0 0 0 0 0 0 0 0 1 0 ...
## $ numSCT : int 0 0 0 0 1 0 0 0 0 0 ...
## $ numBKN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ numOVC : int 1 1 1 1 1 1 1 1 0 0 ...
## $ lowVV : num NA NA NA NA NA NA NA NA NA NA ...
## $ lowOVC : num 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
## $ lowBKN : num NA NA NA NA NA NA NA NA NA NA ...
## $ lowSCT : num NA NA NA NA 2100 NA NA NA NA NA ...
## $ lowFEW : num NA NA NA NA NA NA NA NA 2600 NA ...
## $ wType : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
## $ month : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
# Updated function for plotting numeric by factor
plotFactorNumeric <- function(met, fctVar, numVar, title=NULL, subT) {
if (is.null(title)) { title <- paste0("Hourly Weather - ", numVar, " vs. ", fctVar) }
p <- met %>%
filter(!is.na(get(fctVar)), !is.na(get(numVar))) %>%
ggplot(aes_string(x=fctVar, y=numVar)) +
geom_boxplot(fill="lightblue") +
labs(title=title, subtitle=subT)
print(p)
}
# Function for creating cloud plots
makeFactorPlots <- function(met,
fctVar=c("month", "wType"),
keyVar=c("WindSpeed", "Visibility", "Altimeter", "TempF", "DewF"),
desc="Lincoln, NE (2016) Hourly METAR"
) {
# Run for all of the key variables against wind speed and cloud type
for (varF in fctVar) {
for (varK in keyVar) {
plotFactorNumeric(met, fctVar=varF, numVar=varK, subT=desc)
}
}
# Create stacked bars for cloud type by month
# dfFull %>%
# filter(!is.na(wType), wType!="Error") %>%
# ggplot(aes(x=month, fill=wType)) +
# geom_bar(position="fill") +
# labs(title="Lincoln, NE (2016)", x="", y="Proportion of Month")
}
makeFactorPlots(fullMETAR)
The functions can be combined in to a single routine for reading, parsing, and running EDA on METAR data..
Example code includes:
# Function to run the full process
runAllMETAR <- function(fname, timeZ, expMin, expDays, locMET, shortMET, longMET, valMet,
labsMet=c("METAR", "WindDir", "WindSpeed", "WindGust", "Dummy", "Visibility",
"TempC", "DewC", "Altimeter", "SLP", "FahrC"
),
keyMetric=c("WindDir", "WindSpeed", "WindGust", "Visibility", "TempC",
"DewC", "Altimeter", "SLP", "TempF", "DewF"
),
coreNum=c("TempC", "TempF", "DewC", "DewF",
"Altimeter", "modSLP", "WindSpeed", "Visibility"
),
var1List=c("TempC", "DewC", "Altimeter", "TempF", "TempF", "TempF", "Altimeter"),
var2List=c("TempF", "DewF", "modSLP", "DewF", "Altimeter", "modSLP", "WindSpeed")
) {
# Read in the METAR data
funcMETAR <- readMETAR(fileName=fname, timeZ=timeZ, expMin=expMin, expDays=expDays)
# funcMETAR
# Extract wind data from METAR
windMETAR <- extractWind(funcMETAR)
# windMETAR
# Run basic wind plots
basicWindPlots(windMETAR, desc=locMET, gran=shortMET)
# Run the METAR parsing on the raw data
initMETAR <- initialParseMETAR(funcMETAR, val=valMet, labs=labsMet)
# initMETAR
# Parse and convert the METAR data
convMETAR <- convertMETAR(initMETAR, metrics=keyMetric)
# convMETAR
# Fix problems with visibility, wind gusts, and SLP
parseMETAR <- getVisibility(convMETAR, origMet=funcMETAR)
parseMETAR <- getWindGusts(parseMETAR, origMet=funcMETAR)
parseMETAR <- fixSLP(parseMETAR)
# Check updated plots
plotcountsByMetric(parseMETAR, mets=c("WindGust", "Visibility", "modSLP"))
# Run the correlations function
corMETAR(parseMETAR, numVars=coreNum, subT=longMET)
# Plot correlations
for (n in 1:length(var1List)) {
plotNumCor(parseMETAR, var1List[n], var2List[n], subT=longMET)
}
# Run lm models for SLP vs Altimeter and (optionally) Temperature
lmMETAR(parseMETAR, "modSLP", "Altimeter", yName="Sea Level Pressure", subT=longMET)
lmMETAR(parseMETAR, "modSLP", "Altimeter + TempF", yName="Sea Level Pressure", subT=longMET)
# Run the initial cloud extraction
initClouds <- extractClouds(parseMETAR, metVar="origMETAR", subT=longMET)
str(initClouds)
# Find the lowest cloud levels and lowest ceilings
processedClouds <-findLowestClouds(initClouds$mtxCloud, subT=longMET)
str(processedClouds)
# Bind the processed METAR and the cloud data
fullMETAR <- bindMETAR(dfParse=parseMETAR,
tblClouds=initClouds$tblClouds,
lowCloud=processedClouds$lowCloud
)
# Create box plots for key weather elements against month and cloud type
makeFactorPlots(fullMETAR, desc=longMET)
# Return all of the elements
list(fullMETAR=fullMETAR, funcMETAR=funcMETAR, windMETAR=windMETAR,
initMETAR=initMETAR, convMETAR=convMETAR, parseMETAR=parseMETAR,
initClouds=initClouds, processedClouds=processedClouds
)
}
# Set key parameters for reading and interpreting METAR
fname <- "./RInputFiles/metar_klnk_2016.txt" # file name for raw METAR data
timeZ <- "54Z" # Zulu time that METAR is recorded at this station
expMin <- as.POSIXct("2015-12-31 00:54:00", tz="UTC") # Expected first time read
expDays <- 368 # Expected total days read
locMET <- "Lincoln, NE" # Description of city or location
shortMET <- "KLNK METAR (2016)" # Station code and timing
longMET <- "Lincoln, NE Hourly METAR (2016)" # Description of city or location and timing
# Extraction format for METAR - paste the expected Zulu time at the front
valMet <- paste0(timeZ, ".*?(VRB|\\d{3})(\\d{2})(G\\d{2})?KT(.*?)(\\d{1,2}SM).*?\\s(M?\\d{2})/(M?\\d{2}).*?(A\\d{4}).*?RMK.*?(SLP\\d{3}).*?(T\\d{8})")
# Run the process for Lincoln, NE
klnk2016METAR <- runAllMETAR(fname=fname, timeZ=timeZ, expMin=expMin, expDays=expDays,
locMET=locMET, shortMET=shortMET, longMET=longMET, valMet=valMet
)
## Parsed with column specification:
## cols(
## .default = col_double(),
## station = col_character(),
## valid = col_datetime(format = ""),
## p01i = col_character(),
## skyc1 = col_character(),
## skyc2 = col_character(),
## skyc3 = col_character(),
## skyc4 = col_logical(),
## skyl4 = col_logical(),
## wxcodes = col_character(),
## ice_accretion_1hr = col_character(),
## ice_accretion_3hr = col_character(),
## ice_accretion_6hr = col_character(),
## peak_wind_time = col_datetime(format = ""),
## metar = col_character()
## )
## See spec(...) for full column specifications.
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 10594 obs. of 29 variables:
## $ station : chr "LNK" "LNK" "LNK" "LNK" ...
## $ valid : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## $ tmpf : num 27 26.1 27 27 27 ...
## $ dwpf : num 19.9 19.9 19.9 21 19.9 ...
## $ relh : num 74.5 77.3 74.5 78 74.5 ...
## $ drct : num 300 0 0 280 310 10 0 10 20 0 ...
## $ sknt : num 5 0 0 3 5 9 0 3 3 0 ...
## $ p01i : chr "0.00" "0.00" "0.00" "0.00" ...
## $ alti : num 30.3 30.3 30.3 30.3 30.3 ...
## $ mslp : num 1028 1028 1028 1028 1029 ...
## $ vsby : num 10 10 10 10 10 10 10 10 10 10 ...
## $ gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ skyc1 : chr "OVC" "OVC" "OVC" "OVC" ...
## $ skyc2 : chr NA NA NA NA ...
## $ skyc3 : chr NA NA NA NA ...
## $ skyc4 : logi NA NA NA NA NA NA ...
## $ skyl1 : num 2800 2700 2600 2700 2100 2700 2700 2700 2600 2600 ...
## $ skyl2 : num NA NA NA NA 2700 NA NA NA NA NA ...
## $ skyl3 : num NA NA NA NA NA NA NA NA NA NA ...
## $ skyl4 : logi NA NA NA NA NA NA ...
## $ wxcodes : chr NA NA NA NA ...
## $ ice_accretion_1hr: chr NA NA NA NA ...
## $ ice_accretion_3hr: chr NA NA NA NA ...
## $ ice_accretion_6hr: chr NA NA NA NA ...
## $ peak_wind_gust : num NA NA NA NA NA NA NA NA NA NA ...
## $ peak_wind_drct : num NA NA NA NA NA NA NA NA NA NA ...
## $ peak_wind_time : POSIXct, format: NA NA ...
## $ feel : num 20.4 26.1 27 22.9 20.4 ...
## $ metar : chr "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
##
## *** OBSERVATIONS EXPECTED BUT NOT RECORDED ***
## [1] "2016-01-19 11:54:00 UTC" "2016-05-06 11:54:00 UTC"
## [3] "2016-05-06 12:54:00 UTC" "2016-06-17 23:54:00 UTC"
## [5] "2016-06-18 00:54:00 UTC" "2016-06-18 07:54:00 UTC"
## [7] "2016-07-02 15:54:00 UTC" "2016-07-13 14:54:00 UTC"
## [9] "2016-07-13 15:54:00 UTC" "2016-07-13 16:54:00 UTC"
## [11] "2016-07-13 17:54:00 UTC" "2016-07-30 13:54:00 UTC"
## [13] "2016-08-02 07:54:00 UTC" "2016-08-05 07:54:00 UTC"
## [15] "2016-08-29 21:54:00 UTC" "2016-09-15 16:54:00 UTC"
## [17] "2016-09-16 05:54:00 UTC" "2016-11-21 00:54:00 UTC"
## [19] "2016-12-03 08:54:00 UTC"
##
## *** OBSERVATIONS RECORDED BUT NOT EXPECTED ***
## POSIXct of length 0
##
## *** Are the extracted records unique? ***
## [1] TRUE
##
##
## *** First 6 winds and parsing ***
## [,1] [,2] [,3] [,4]
## [1,] "30005KT" "300" "05" NA
## [2,] "00000KT" "000" "00" NA
## [3,] "00000KT" "000" "00" NA
## [4,] "28003KT" "280" "03" NA
## [5,] "31005KT" "310" "05" NA
## [6,] "01009KT" "010" "09" NA
##
## *** Table of WIND DIRECTION ***
##
## 000 010 020 030 040 050 060 070 080 090 100 110 120 130 140 150
## 875 269 199 146 135 121 108 95 88 65 102 158 169 245 241 339
## 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310
## 463 565 517 413 284 179 142 96 73 80 105 89 121 141 147 225
## 320 330 340 350 360 VRB <NA>
## 234 303 352 413 383 114 19
##
## *** Table of WIND SPEED ***
##
## 00 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17
## 875 615 704 664 696 651 636 599 536 451 439 371 315 276 235 173
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 34
## 156 108 69 62 45 33 23 13 14 9 10 6 6 2 1 1
## <NA>
## 19
##
## *** Table of WIND GUST ***
##
## G14 G15 G16 G17 G18 G19 G20 G21 G22 G23 G24 G25 G26 G27 G28 G29
## 6 11 11 16 30 59 69 87 83 107 101 83 62 61 61 37
## G30 G31 G32 G33 G34 G35 G36 G37 G38 G39 G40 G41 G42 G43 G45 <NA>
## 28 24 18 13 12 14 6 6 10 11 2 3 1 2 2 7777
##
## *** WIND DATA WAS NOT CAPTURED FROM: ***
## # A tibble: 19 x 1
## metar
## <chr>
## 1 KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028
## 2 KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083
## 3 KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013
## 4 KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006~
## 5 KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $
## 6 KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007
## 7 KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58~
## 8 KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $
## 9 KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106
## 10 KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200
## 11 KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $
## 12 KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222
## 13 KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001
## 14 KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010
## 15 KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183
## 16 KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100
## 17 KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $
## 18 KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067
## 19 KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050
## Warning: Removed 19 rows containing non-finite values (stat_count).
## Warning: Removed 4 rows containing missing values (geom_point).
##
## *** Tentative Summary of Element Parsing ***
## .
## FALSE TRUE
## 23 8790
##
## *** Data Not Matched ***
## [1] "KLNK 291354Z 10SM CLR M01/M03 A2976 RMK AO2 SLP088 T10061028"
## [2] "KLNK 012154Z 10SM CLR 01/M08 A3018 RMK AO2 SLP234 T00061083"
## [3] "KLNK 201754Z 10SM CLR 06/M07 A3041 RMK AO2 SLP308 T00611067 10061 21039 58013"
## [4] "KLNK 221654Z 19007KT CLR 15/03 A2956 RMK AO2 SLP006 T01500033 $"
## [5] "KLNK 221754Z 10SM CLR 16/03 A2955 RMK AO2 SLP000 T01610033 10161 20106 58006 $"
## [6] "KLNK 221854Z 10SM CLR 17/03 A2954 RMK AO2 SLP000 T01670033 $"
## [7] "KLNK 050254Z 10SM CLR 14/03 A3004 RMK AO2 SLP169 T01390033 53007"
## [8] "KLNK 181754Z 10SM OVC075 21/04 A3023 RMK AO2 SLP234 T02110039 10217 20072 58011"
## [9] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"
## [10] "KLNK 011554Z 32007KT CLR 22/09 A3008 RMK AO2 SLP179 T02170089 $"
## [11] "KLNK 152254Z 10SM CLR 38/11 A2978 RMK AO2 SLP070 T03780106"
## [12] "KLNK 181954Z 10SM SCT045 31/20 A3017 RMK AO2 SLP205 T03060200"
## [13] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"
## [14] "KLNK 261654Z 10SM CLR 29/18 A3014 RMK AO2 SLP192 T02890183 $"
## [15] "KLNK 261754Z 10SM CLR 31/18 A3012 RMK AO2 SLP188 T03060178 10306 20222"
## [16] "KLNK 271754Z 10SM CLR 33/18 A3015 RMK AO2 SLP198 T03330183 10333 20222 55001"
## [17] "KLNK 251754Z 10SM CLR 29/17 A3013 RMK AO2 SLP192 T02890167 10289 20194 58010"
## [18] "KLNK 261854Z 10SM CLR 30/18 A3006 RMK AO2 SLP166 T03000183"
## [19] "KLNK 211854Z 10SM CLR 27/10 A3007 RMK AO2 SLP174 T02670100"
## [20] "KLNK 201454Z 10SM CLR 24/21 A3008 RMK AO2 SLP178 T02390206 56004 $"
## [21] "KLNK 201554Z 15014G19KT CLR 27/22 A3007 RMK AO2 SLP174 T02670217 $"
## [22] "KLNK 051854Z 10SM CLR 22/07 A3027 RMK AO2 SLP244 T02170067"
## [23] "KLNK 170754Z AUTO 10SM CLR 08/05 A2950 RMK AO2 SLP984 T00830050"
##
## *** Parsing matrix summary ***
## [1] 8813 11
## [,1]
## [1,] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067"
## [2,] "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067"
## [3,] "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067"
## [4,] "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061"
## [5,] "54Z 31005KT 10SM SCT021 OVC027 M03/M07 A3033 RMK AO2 SLP286 T10281067"
## [6,] "54Z AUTO 01009KT 10SM OVC027 M06/M10 A3033 RMK AO2 SLP289 T10611100"
## [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11]
## [1,] "300" "05" NA " " "10SM" "M03" "M07" "A3029" "SLP275" "T10281067"
## [2,] "000" "00" NA " " "10SM" "M03" "M07" "A3030" "SLP277" "T10331067"
## [3,] "000" "00" NA " " "10SM" "M03" "M07" "A3030" "SLP277" "T10281067"
## [4,] "280" "03" NA " " "10SM" "M03" "M06" "A3031" "SLP281" "T10281061"
## [5,] "310" "05" NA " " "10SM" "M03" "M07" "A3033" "SLP286" "T10281067"
## [6,] "010" "09" NA " " "10SM" "M06" "M10" "A3033" "SLP289" "T10611100"
##
## *** Summary of the parsed data ***
## Observations: 8,813
## Variables: 13
## $ METAR <chr> "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T1...
## $ WindDir <chr> "300", "000", "000", "280", "310", "010", "000", "010", ...
## $ WindSpeed <chr> "05", "00", "00", "03", "05", "09", "00", "03", "00", "0...
## $ WindGust <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Dummy <chr> " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", "...
## $ Visibility <chr> "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", "10SM", ...
## $ TempC <chr> "M03", "M03", "M03", "M03", "M03", "M06", "M07", "M08", ...
## $ DewC <chr> "M07", "M07", "M07", "M06", "M07", "M10", "M11", "M11", ...
## $ Altimeter <chr> "A3029", "A3030", "A3030", "A3031", "A3033", "A3033", "A...
## $ SLP <chr> "SLP275", "SLP277", "SLP277", "SLP281", "SLP286", "SLP28...
## $ FahrC <chr> "T10281067", "T10331067", "T10281067", "T10281061", "T10...
## $ dtime <dttm> 2015-12-31 00:54:00, 2015-12-31 01:54:00, 2015-12-31 02...
## $ origMETAR <chr> "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 ...
## Warning: NAs introduced by coercion
##
## *** Parsed data structure, head, tail, and random sample ***
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 15 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : int 5 0 0 3 5 9 0 3 0 0 ...
## $ WindGust : num NA NA NA NA NA NA NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: num 10 10 10 10 10 10 10 10 10 10 ...
## $ TempC : int -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## $ DewC : int -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## $ Altimeter : int 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## $ SLP : int 275 277 277 281 286 289 290 291 295 295 ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
## $ dtime : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## $ origMETAR : chr "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ TempF : num 27 26.1 27 27 27 ...
## $ DewF : num 19.9 19.9 19.9 21 19.9 ...
## # A tibble: 6 x 15
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 300 5 NA " " 10 -3 -7 3029 275
## 2 54Z ~ 000 0 NA " " 10 -3 -7 3030 277
## 3 54Z ~ 000 0 NA " " 10 -3 -7 3030 277
## 4 54Z ~ 280 3 NA " " 10 -3 -6 3031 281
## 5 54Z ~ 310 5 NA " " 10 -3 -7 3033 286
## 6 54Z ~ 010 9 NA " " 10 -6 -10 3033 289
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## # TempF <dbl>, DewF <dbl>
## # A tibble: 6 x 15
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 160 6 NA " " 10 2 -4 2993 147
## 2 54Z ~ VRB 4 NA " " 10 4 -4 2990 139
## 3 54Z ~ 130 7 NA " " 10 4 -7 2989 134
## 4 54Z ~ 110 7 NA " " 10 4 -7 2988 130
## 5 54Z ~ 100 10 NA " " 10 3 -5 2986 125
## 6 54Z ~ 100 10 NA " " 10 3 -6 2986 123
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## # TempF <dbl>, DewF <dbl>
## # A tibble: 20 x 15
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC DewC Altimeter SLP
## <chr> <chr> <int> <dbl> <chr> <dbl> <int> <int> <int> <int>
## 1 54Z ~ 360 7 NA " " 10 21 15 2995 130
## 2 54Z ~ 210 13 NA " " 10 31 4 2988 108
## 3 54Z ~ 340 11 NA " " 10 -7 -13 3025 259
## 4 54Z ~ 240 9 NA " " 10 13 8 2961 18
## 5 54Z ~ 310 6 NA " " 10 -7 -19 3044 329
## 6 54Z ~ 040 7 NA " " 10 26 14 3010 179
## 7 54Z ~ 170 17 NA " " 10 31 22 2983 90
## 8 54Z ~ 160 7 NA " " 10 -1 -4 3002 182
## 9 54Z ~ 310 9 NA " " 10 13 9 3021 226
## 10 54Z ~ 110 10 NA " " 10 16 16 2990 116
## 11 54Z ~ 140 5 NA " " 10 19 18 3013 199
## 12 54Z ~ 110 6 NA " " 10 20 4 3012 197
## 13 54Z ~ 220 4 NA " " 10 3 -4 3042 311
## 14 54Z ~ 350 3 NA " " 8 9 8 3011 194
## 15 54Z ~ 080 8 NA " " 10 -12 -18 3046 340
## 16 54Z ~ 320 7 NA " " 10 16 11 2998 144
## 17 54Z ~ 330 6 NA " " 10 9 5 3016 214
## 18 54Z ~ 340 13 NA " " 3 0 -1 2979 99
## 19 54Z ~ 190 5 NA " " 10 3 -4 3024 251
## 20 54Z ~ 320 18 NA " " 10 24 12 2999 147
## # ... with 5 more variables: FahrC <chr>, dtime <dttm>, origMETAR <chr>,
## # TempF <dbl>, DewF <dbl>
##
## *** Number of NA values ***
## METAR WindDir WindSpeed WindGust Dummy Visibility TempC
## 23 23 23 8813 23 23 23
## DewC Altimeter SLP FahrC dtime origMETAR TempF
## 23 23 23 23 0 0 23
## DewF
## 23
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning in min(diff(sort(x))): no non-missing arguments to min; returning Inf
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## # A tibble: 18 x 2
## Visibility n
## <dbl> <int>
## 1 0.25 20
## 2 0.5 16
## 3 0.75 15
## 4 1 19
## 5 1.25 13
## 6 1.5 17
## 7 1.75 21
## 8 2 50
## 9 2.5 38
## 10 3 70
## 11 4 108
## 12 5 108
## 13 6 146
## 14 7 189
## 15 8 221
## 16 9 290
## 17 10 7449
## 18 NA 23
## WindGust n
## 1 14 4
## 2 15 11
## 3 16 11
## 4 17 16
## 5 18 29
## 6 19 59
## 7 20 69
## 8 21 87
## 9 22 83
## 10 23 107
## 11 24 101
## 12 25 83
## 13 26 62
## 14 27 61
## 15 28 61
## 16 29 37
## 17 30 28
## 18 31 24
## 19 32 18
## 20 33 13
## 21 34 12
## 22 35 14
## 23 36 6
## 24 37 6
## 25 38 10
## 26 39 11
## 27 40 2
## 28 41 3
## 29 42 1
## 30 43 2
## 31 45 2
## 32 NA 7780
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (position_stack).
##
## *** Correlations use 8790 complete cases (99.7% of 8813 total) ***
## TempC TempF DewC DewF Altimeter modSLP WindSpeed Visibility
## TempC 1.00 1.00 0.91 0.91 -0.38 -0.48 0.13 0.19
## TempF 1.00 1.00 0.91 0.91 -0.38 -0.48 0.13 0.19
## DewC 0.91 0.91 1.00 1.00 -0.38 -0.48 -0.01 0.07
## DewF 0.91 0.91 1.00 1.00 -0.38 -0.48 -0.01 0.07
## Altimeter -0.38 -0.38 -0.38 -0.38 1.00 0.99 -0.26 0.07
## modSLP -0.48 -0.48 -0.48 -0.48 0.99 1.00 -0.25 0.04
## WindSpeed 0.13 0.13 -0.01 -0.01 -0.26 -0.25 1.00 -0.01
## Visibility 0.19 0.19 0.07 0.07 0.07 0.04 -0.01 1.00
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
##
## *** Regression call is: modSLP ~ Altimeter ***
##
## Call:
## lm(formula = formula(myChar), data = met)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8890 -0.8172 -0.1578 0.7610 2.7890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.233e+01 1.406e+00 -44.34 <2e-16 ***
## Altimeter 3.594e-01 4.683e-04 767.38 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9691 on 8788 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.9853, Adjusted R-squared: 0.9853
## F-statistic: 5.889e+05 on 1 and 8788 DF, p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
##
## *** Regression call is: modSLP ~ Altimeter + TempF ***
##
## Call:
## lm(formula = formula(myChar), data = met)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.24130 -0.26737 0.00686 0.25214 1.23326
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.002e+01 5.615e-01 -17.84 <2e-16 ***
## Altimeter 3.428e-01 1.857e-04 1845.63 <2e-16 ***
## TempF -4.559e-02 1.921e-04 -237.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3561 on 8787 degrees of freedom
## (23 observations deleted due to missingness)
## Multiple R-squared: 0.998, Adjusted R-squared: 0.998
## F-statistic: 2.209e+06 on 2 and 8787 DF, p-value: < 2.2e-16
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
##
## *** Counts by number of layers of each cloud type ***
## isCLR isVV numFEW numSCT numBKN numOVC n
## 1 0 0 0 0 0 0 6
## 2 0 0 0 0 0 1 1389
## 3 0 0 0 0 1 0 307
## 4 0 0 0 0 1 1 250
## 5 0 0 0 0 2 0 50
## 6 0 0 0 0 2 1 45
## 7 0 0 0 0 3 0 8
## 8 0 0 0 1 0 0 230
## 9 0 0 0 1 0 1 73
## 10 0 0 0 1 1 0 52
## 11 0 0 0 1 1 1 42
## 12 0 0 0 1 2 0 17
## 13 0 0 0 2 0 0 16
## 14 0 0 0 2 0 1 9
## 15 0 0 0 2 1 0 6
## 16 0 0 1 0 0 0 380
## 17 0 0 1 0 0 1 90
## 18 0 0 1 0 1 0 46
## 19 0 0 1 0 1 1 62
## 20 0 0 1 0 2 0 9
## 21 0 0 1 1 0 0 39
## 22 0 0 1 1 0 1 24
## 23 0 0 1 1 1 0 28
## 24 0 0 1 2 0 0 10
## 25 0 0 2 0 0 0 24
## 26 0 0 2 0 0 1 6
## 27 0 0 2 0 1 0 5
## 28 0 0 2 1 0 0 3
## 29 0 0 3 0 0 0 2
## 30 0 1 0 0 0 0 33
## 31 1 0 0 0 0 0 5552
##
## *** METAR records where no clouds were extracted ***
## [1] "KLNK 301854Z 17011KT 10SM 30/18 A2993 RMK AO2 SLP124 T03000178 $"
## [2] "KLNK 011454Z 21/10 A3008 RMK AO2 SLP178 T02110100 51011 $"
## [3] "KLNK 261554Z 05006KT 27/18 A3013 RMK AO2 SLP190 T02670178 RVRNO $"
## [4] "KLNK 161554Z 09009KT 10SM 26/19 A3007 RMK AO2 SLP171 T02610194 $"
## [5] "KLNK 211954Z 25008KT 210V270 10SM 27/10 A3005 RMK AO2 SLP170 T02720100 $"
## [6] "KLNK 050854Z 00000KT 1/2SM R36/3000VP6000FT FG 09/09 A2978 RMK AO2 SLP081 T00940094 51008 $"
##
## *** Dimensions for the cloud matrix ***
## [1] 8813 11
## List of 2
## $ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 7 variables:
## ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
## ..$ isVV : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ htVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
## ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
## ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
## $ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
##
## *** Lowest clouds by type tibble ***
## # A tibble: 8,813 x 5
## lowVV lowOVC lowBKN lowSCT lowFEW
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA 2800 NA NA NA
## 2 NA 2700 NA NA NA
## 3 NA 2600 NA NA NA
## 4 NA 2700 NA NA NA
## 5 NA 2700 NA 2100 NA
## 6 NA 2700 NA NA NA
## 7 NA 2700 NA NA NA
## 8 NA 2700 NA NA NA
## 9 NA NA NA NA 2600
## 10 NA NA NA NA NA
## # ... with 8,803 more rows
## List of 3
## $ lowCloud :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 5 variables:
## ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
## ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
## ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
## $ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
## $ minCloudLevel : num [1:8813] 2800 2700 2600 2700 2100 ...
## Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 30 variables:
## $ METAR : chr "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ WindDir : chr "300" "000" "000" "280" ...
## $ WindSpeed : int 5 0 0 3 5 9 0 3 0 0 ...
## $ WindGust : num NA NA NA NA NA NA NA NA NA NA ...
## $ Dummy : chr " " " " " " " " ...
## $ Visibility: num 10 10 10 10 10 10 10 10 10 10 ...
## $ TempC : int -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## $ DewC : int -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## $ Altimeter : int 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## $ SLP : int 275 277 277 281 286 289 290 291 295 295 ...
## $ FahrC : chr "T10281067" "T10331067" "T10281067" "T10281061" ...
## $ dtime : POSIXct, format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## $ origMETAR : chr "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ TempF : num 27 26.1 27 27 27 ...
## $ DewF : num 19.9 19.9 19.9 21 19.9 ...
## $ modSLP : num 1028 1028 1028 1028 1029 ...
## $ isCLR : num 0 0 0 0 0 0 0 0 0 1 ...
## $ isVV : num 0 0 0 0 0 0 0 0 0 0 ...
## $ htVV : num NA NA NA NA NA NA NA NA NA NA ...
## $ numFEW : int 0 0 0 0 0 0 0 0 1 0 ...
## $ numSCT : int 0 0 0 0 1 0 0 0 0 0 ...
## $ numBKN : int 0 0 0 0 0 0 0 0 0 0 ...
## $ numOVC : int 1 1 1 1 1 1 1 1 0 0 ...
## $ lowVV : num NA NA NA NA NA NA NA NA NA NA ...
## $ lowOVC : num 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
## $ lowBKN : num NA NA NA NA NA NA NA NA NA NA ...
## $ lowSCT : num NA NA NA NA 2100 NA NA NA NA NA ...
## $ lowFEW : num NA NA NA NA NA NA NA NA 2600 NA ...
## $ wType : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
## $ month : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
str(klnk2016METAR)
## List of 8
## $ fullMETAR :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 30 variables:
## ..$ METAR : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ WindDir : chr [1:8813] "300" "000" "000" "280" ...
## ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
## ..$ WindGust : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ Dummy : chr [1:8813] " " " " " " " " ...
## ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
## ..$ TempC : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## ..$ DewC : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## ..$ SLP : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
## ..$ FahrC : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
## ..$ dtime : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ TempF : num [1:8813] 27 26.1 27 27 27 ...
## ..$ DewF : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
## ..$ modSLP : num [1:8813] 1028 1028 1028 1028 1029 ...
## ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
## ..$ isVV : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ htVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ numFEW : int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
## ..$ numSCT : int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
## ..$ numBKN : int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## ..$ numOVC : int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
## ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ lowOVC : num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
## ..$ lowBKN : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ lowSCT : num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
## ..$ lowFEW : num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
## ..$ wType : Factor w/ 7 levels "VV","OVC","BKN",..: 2 2 2 2 2 2 2 2 5 6 ...
## ..$ month : Factor w/ 12 levels "Jan","Feb","Mar",..: 12 12 12 12 12 12 12 12 12 12 ...
## $ funcMETAR :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 29 variables:
## ..$ station : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
## ..$ valid : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## ..$ tmpf : num [1:8813] 27 26.1 27 27 27 ...
## ..$ dwpf : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
## ..$ relh : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
## ..$ drct : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
## ..$ sknt : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
## ..$ p01i : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
## ..$ alti : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
## ..$ mslp : num [1:8813] 1028 1028 1028 1028 1029 ...
## ..$ vsby : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
## ..$ gust : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ skyc1 : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
## ..$ skyc2 : chr [1:8813] NA NA NA NA ...
## ..$ skyc3 : chr [1:8813] NA NA NA NA ...
## ..$ skyc4 : logi [1:8813] NA NA NA NA NA NA ...
## ..$ skyl1 : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
## ..$ skyl2 : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
## ..$ skyl3 : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ skyl4 : logi [1:8813] NA NA NA NA NA NA ...
## ..$ wxcodes : chr [1:8813] NA NA NA NA ...
## ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
## ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
## ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
## ..$ peak_wind_gust : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ peak_wind_drct : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ peak_wind_time : POSIXct[1:8813], format: NA NA ...
## ..$ feel : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
## ..$ metar : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..- attr(*, "spec")=
## .. .. cols(
## .. .. station = col_character(),
## .. .. valid = col_datetime(format = ""),
## .. .. tmpf = col_double(),
## .. .. dwpf = col_double(),
## .. .. relh = col_double(),
## .. .. drct = col_double(),
## .. .. sknt = col_double(),
## .. .. p01i = col_character(),
## .. .. alti = col_double(),
## .. .. mslp = col_double(),
## .. .. vsby = col_double(),
## .. .. gust = col_double(),
## .. .. skyc1 = col_character(),
## .. .. skyc2 = col_character(),
## .. .. skyc3 = col_character(),
## .. .. skyc4 = col_logical(),
## .. .. skyl1 = col_double(),
## .. .. skyl2 = col_double(),
## .. .. skyl3 = col_double(),
## .. .. skyl4 = col_logical(),
## .. .. wxcodes = col_character(),
## .. .. ice_accretion_1hr = col_character(),
## .. .. ice_accretion_3hr = col_character(),
## .. .. ice_accretion_6hr = col_character(),
## .. .. peak_wind_gust = col_double(),
## .. .. peak_wind_drct = col_double(),
## .. .. peak_wind_time = col_datetime(format = ""),
## .. .. feel = col_double(),
## .. .. metar = col_character()
## .. .. )
## $ windMETAR :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 32 variables:
## ..$ station : chr [1:8813] "LNK" "LNK" "LNK" "LNK" ...
## ..$ valid : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## ..$ tmpf : num [1:8813] 27 26.1 27 27 27 ...
## ..$ dwpf : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
## ..$ relh : num [1:8813] 74.5 77.3 74.5 78 74.5 ...
## ..$ drct : num [1:8813] 300 0 0 280 310 10 0 10 0 0 ...
## ..$ sknt : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
## ..$ p01i : chr [1:8813] "0.00" "0.00" "0.00" "0.00" ...
## ..$ alti : num [1:8813] 30.3 30.3 30.3 30.3 30.3 ...
## ..$ mslp : num [1:8813] 1028 1028 1028 1028 1029 ...
## ..$ vsby : num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
## ..$ gust : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ skyc1 : chr [1:8813] "OVC" "OVC" "OVC" "OVC" ...
## ..$ skyc2 : chr [1:8813] NA NA NA NA ...
## ..$ skyc3 : chr [1:8813] NA NA NA NA ...
## ..$ skyc4 : logi [1:8813] NA NA NA NA NA NA ...
## ..$ skyl1 : num [1:8813] 2800 2700 2600 2700 2100 2700 2700 2700 2600 NA ...
## ..$ skyl2 : num [1:8813] NA NA NA NA 2700 NA NA NA NA NA ...
## ..$ skyl3 : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ skyl4 : logi [1:8813] NA NA NA NA NA NA ...
## ..$ wxcodes : chr [1:8813] NA NA NA NA ...
## ..$ ice_accretion_1hr: chr [1:8813] NA NA NA NA ...
## ..$ ice_accretion_3hr: chr [1:8813] NA NA NA NA ...
## ..$ ice_accretion_6hr: chr [1:8813] NA NA NA NA ...
## ..$ peak_wind_gust : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ peak_wind_drct : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ peak_wind_time : POSIXct[1:8813], format: NA NA ...
## ..$ feel : num [1:8813] 20.4 26.1 27 22.9 20.4 ...
## ..$ metar : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ dirW : chr [1:8813] "300" "000" "000" "280" ...
## ..$ spdW : num [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
## ..$ gustW : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## $ initMETAR :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 13 variables:
## ..$ METAR : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ WindDir : chr [1:8813] "300" "000" "000" "280" ...
## ..$ WindSpeed : chr [1:8813] "05" "00" "00" "03" ...
## ..$ WindGust : chr [1:8813] NA NA NA NA ...
## ..$ Dummy : chr [1:8813] " " " " " " " " ...
## ..$ Visibility: chr [1:8813] "10SM" "10SM" "10SM" "10SM" ...
## ..$ TempC : chr [1:8813] "M03" "M03" "M03" "M03" ...
## ..$ DewC : chr [1:8813] "M07" "M07" "M07" "M06" ...
## ..$ Altimeter : chr [1:8813] "A3029" "A3030" "A3030" "A3031" ...
## ..$ SLP : chr [1:8813] "SLP275" "SLP277" "SLP277" "SLP281" ...
## ..$ FahrC : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
## ..$ dtime : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## $ convMETAR :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 15 variables:
## ..$ METAR : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ WindDir : chr [1:8813] "300" "000" "000" "280" ...
## ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
## ..$ WindGust : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ Dummy : chr [1:8813] " " " " " " " " ...
## ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
## ..$ TempC : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## ..$ DewC : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## ..$ SLP : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
## ..$ FahrC : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
## ..$ dtime : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ TempF : num [1:8813] 27 26.1 27 27 27 ...
## ..$ DewF : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
## $ parseMETAR :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 16 variables:
## ..$ METAR : chr [1:8813] "54Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "54Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "54Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067" "54Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ WindDir : chr [1:8813] "300" "000" "000" "280" ...
## ..$ WindSpeed : int [1:8813] 5 0 0 3 5 9 0 3 0 0 ...
## ..$ WindGust : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## ..$ Dummy : chr [1:8813] " " " " " " " " ...
## ..$ Visibility: num [1:8813] 10 10 10 10 10 10 10 10 10 10 ...
## ..$ TempC : int [1:8813] -3 -3 -3 -3 -3 -6 -7 -8 -10 -9 ...
## ..$ DewC : int [1:8813] -7 -7 -7 -6 -7 -10 -11 -11 -12 -12 ...
## ..$ Altimeter : int [1:8813] 3029 3030 3030 3031 3033 3033 3034 3034 3034 3035 ...
## ..$ SLP : int [1:8813] 275 277 277 281 286 289 290 291 295 295 ...
## ..$ FahrC : chr [1:8813] "T10281067" "T10331067" "T10281067" "T10281061" ...
## ..$ dtime : POSIXct[1:8813], format: "2015-12-31 00:54:00" "2015-12-31 01:54:00" ...
## ..$ origMETAR : chr [1:8813] "KLNK 310054Z 30005KT 10SM OVC028 M03/M07 A3029 RMK AO2 SLP275 T10281067" "KLNK 310154Z 00000KT 10SM OVC027 M03/M07 A3030 RMK AO2 SLP277 T10331067" "KLNK 310254Z 00000KT 10SM OVC026 M03/M07 A3030 RMK AO2 SLP277 T10281067 51008" "KLNK 310354Z 28003KT 10SM OVC027 M03/M06 A3031 RMK AO2 SLP281 T10281061" ...
## ..$ TempF : num [1:8813] 27 26.1 27 27 27 ...
## ..$ DewF : num [1:8813] 19.9 19.9 19.9 21 19.9 ...
## ..$ modSLP : num [1:8813] 1028 1028 1028 1028 1029 ...
## $ initClouds :List of 2
## ..$ tblClouds:Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 7 variables:
## .. ..$ isCLR : num [1:8813] 0 0 0 0 0 0 0 0 0 1 ...
## .. ..$ isVV : num [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## .. ..$ htVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## .. ..$ numFEW: int [1:8813] 0 0 0 0 0 0 0 0 1 0 ...
## .. ..$ numSCT: int [1:8813] 0 0 0 0 1 0 0 0 0 0 ...
## .. ..$ numBKN: int [1:8813] 0 0 0 0 0 0 0 0 0 0 ...
## .. ..$ numOVC: int [1:8813] 1 1 1 1 1 1 1 1 0 0 ...
## ..$ mtxCloud : chr [1:8813, 1:11] "" "" "" "" ...
## $ processedClouds:List of 3
## ..$ lowCloud :Classes 'tbl_df', 'tbl' and 'data.frame': 8813 obs. of 5 variables:
## .. ..$ lowVV : num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## .. ..$ lowOVC: num [1:8813] 2800 2700 2600 2700 2700 2700 2700 2700 NA NA ...
## .. ..$ lowBKN: num [1:8813] NA NA NA NA NA NA NA NA NA NA ...
## .. ..$ lowSCT: num [1:8813] NA NA NA NA 2100 NA NA NA NA NA ...
## .. ..$ lowFEW: num [1:8813] NA NA NA NA NA NA NA NA 2600 NA ...
## ..$ minCeilingLevel: num [1:8813] 2800 2700 2600 2700 2700 ...
## ..$ minCloudLevel : num [1:8813] 2800 2700 2600 2700 2100 ...